[python] PyPI API constraints implementation (PY-78109)

* All URLs which respond with an HTML5 page MUST end with a / and the repository SHOULD redirect the URLs without a / to add a / to the end.

* Repositories MAY redirect unnormalized URLs to the canonical normalized URL (e.g. /Foobar/ may redirect to /foobar/), however clients MUST NOT rely on this redirection and MUST request the normalized URL.

https://packaging.python.org/en/latest/specifications/simple-repository-api/#base-html-api


(cherry picked from commit c90a3779344521428b03fd361b6e8daa349138c3)

IJ-MR-152631

GitOrigin-RevId: ef2dff1ccc18b8a200e69e1ab9843717d29d5fd3
This commit is contained in:
Vitaly Legchilkin
2025-01-09 21:32:01 +01:00
committed by intellij-monorepo-bot
parent b78d31a7fd
commit e2d66c37e4
3 changed files with 85 additions and 14 deletions

View File

@@ -40,13 +40,23 @@ import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import static com.jetbrains.python.packaging.common.PackagesKt.normalizePackageName;
public class PyPIPackageUtil {
private static final Logger LOG = Logger.getInstance(PyPIPackageUtil.class);
private static final Gson GSON = new GsonBuilder().create();
private static final String PYPI_HOST = "https://pypi.python.org";
public static final String PYPI_URL = PYPI_HOST + "/pypi";
public static final String PYPI_LIST_URL = PYPI_HOST + "/simple";
private static final String PYPI_BASE_URL = "https://pypi.org";
private static final String PYPI_SIMPLE_REPOSITORY_API = "/simple/";
private static final String PYPI_DETAILS_API = "/pypi/";
private static final String PYPI_PROJECT_API = "/project/";
/**
* @deprecated Use {@link #buildDetailsUrl(String, String)} instead
*/
@Deprecated
public static final String PYPI_URL = PYPI_BASE_URL + "/pypi";
public static final String PYPI_LIST_URL = PYPI_BASE_URL + PYPI_SIMPLE_REPOSITORY_API;
public static final PyPIPackageUtil INSTANCE = new PyPIPackageUtil();
@@ -63,8 +73,7 @@ public class PyPIPackageUtil {
final List<String> repositories = PyPackageService.getInstance().additionalRepositories;
for (String repository : repositories) {
try {
final String packageUrl = StringUtil.trimEnd(repository, "/") + "/" + key;
final List<String> versions = parsePackageVersionsFromArchives(packageUrl, key);
final List<String> versions = parsePackageVersionsFromRepository(repository, key);
if (!versions.isEmpty()) {
LOG.debug("Found versions " + versions + "of " + key + " at " + repository);
return Collections.unmodifiableList(versions);
@@ -101,7 +110,8 @@ public class PyPIPackageUtil {
@Override
public PackageDetails load(@NotNull String key) throws Exception {
LOG.debug("Fetching details for the package '" + key + "' on PyPI");
return HttpRequests.request(PYPI_URL + "/" + key + "/json")
final String detailsUrl = buildDetailsUrl(PYPI_LIST_URL, key);
return HttpRequests.request(detailsUrl)
.userAgent(getUserAgent())
.connect(request -> GSON.fromJson(request.getReader(), PackageDetails.class));
}
@@ -122,14 +132,14 @@ public class PyPIPackageUtil {
}
public static boolean isPyPIRepository(@Nullable String repository) {
return repository != null && repository.startsWith(PYPI_HOST);
return repository != null && repository.startsWith(PYPI_BASE_URL);
}
public @NotNull List<RepoPackage> getAdditionalPackages(@NotNull List<String> repositories) {
return StreamEx.of(myAdditionalPackages.getAllPresent(repositories).values()).flatMap(StreamEx::of).toList();
}
public void loadAdditionalPackages(@NotNull List<String> repositories, boolean alwaysRefresh) throws IOException {
public void loadAdditionalPackages(@NotNull List<String> repositories, boolean alwaysRefresh) {
var failedToConnect = new ArrayList<String>();
if (alwaysRefresh) {
for (String url : repositories) {
@@ -261,6 +271,66 @@ public class PyPIPackageUtil {
return version;
}
private static String normalizeRepositoryUrl(@NotNull String repositoryUrl) throws IllegalArgumentException {
final String normalizedRepositoryUrl = repositoryUrl.endsWith("/") ? repositoryUrl : repositoryUrl + "/";
if (!normalizedRepositoryUrl.endsWith(PYPI_SIMPLE_REPOSITORY_API)) {
throw new IllegalArgumentException("The Repository URL must end with " + PYPI_SIMPLE_REPOSITORY_API);
}
return normalizedRepositoryUrl;
}
/**
* The following constraints are placed on the API:
* <ul>
* <li> All URLs which respond with an HTML5 page MUST end with a / and
* the repository SHOULD redirect the URLs without a / to add a / to the end.
* <li> Repositories MAY redirect unnormalized URLs to the canonical normalized URL (e.g. /Foobar/ may redirect to /foobar/),
* however clients MUST NOT rely on this redirection and MUST request the normalized URL.
* </ul>
*
* @see <a href="https://packaging.python.org/en/latest/specifications/simple-repository-api/#base-html-api">Base HTML API</a>
* @see <a href="https://packaging.python.org/en/latest/specifications/simple-repository-api/#normalized-names">Normalized Names</a>
*/
public static @NotNull String buildPackageUrl(@NotNull String repositoryUrl, @NotNull String packageName) {
final String normalizedPackageName = normalizePackageName(packageName);
final String normalizedRepositoryUrl = normalizeRepositoryUrl(repositoryUrl);
final String packageUrl = normalizedRepositoryUrl + normalizedPackageName + "/";
return packageUrl;
}
/**
* Project API uses not normalized names (e.g. /project/Flask is correct, GET /project/flask redirects (HTTP 301) to /project/Flask)
*/
public static @NotNull String buildProjectUrl(@NotNull String packageName) {
final String projectUrl = PYPI_BASE_URL + PYPI_PROJECT_API + packageName + "/";
return projectUrl;
}
/**
* Details API uses normalized names, makes HTTP 301 redirect in case of non-normalized.
*/
public static @NotNull String buildDetailsUrl(@NotNull String repositoryUrl,
@NotNull String packageName) throws IllegalArgumentException {
final String normalizedRepositoryUrl = normalizeRepositoryUrl(repositoryUrl);
final String normalizedPackageName = normalizePackageName(packageName);
final String baseUrl = StringUtil.trimEnd(normalizedRepositoryUrl, PYPI_SIMPLE_REPOSITORY_API);
final String detailsUrl = baseUrl + PYPI_DETAILS_API + normalizedPackageName + "/json";
return detailsUrl;
}
public static @NotNull List<String> parsePackageVersionsFromRepository(@NotNull String repositoryUrl,
@NotNull String packageName) throws IOException {
String packageUrl = buildPackageUrl(repositoryUrl, packageName);
return parsePackageVersionsFromArchives(packageUrl, packageName);
}
/**
* @deprecated Use {@link #parsePackageVersionsFromRepository} instead
*/
@Deprecated
public static @NotNull List<String> parsePackageVersionsFromArchives(@NotNull String archivesUrl,
@NotNull String packageName) throws IOException {
return HttpRequests.request(archivesUrl).userAgent(getUserAgent()).connect(request -> {

View File

@@ -37,8 +37,8 @@ abstract class PipBasedRepositoryManager(project: Project, sdk: Sdk) : PythonRep
val repositoryUrl = it.repository?.repositoryUrl ?: PyPIPackageRepository.repositoryUrl!!
val result = runCatching {
val packageUrl = repositoryUrl.replace("simple", "pypi/${it.name}/json")
HttpRequests.request(packageUrl)
val packageDetailsUrl = PyPIPackageUtil.buildDetailsUrl(repositoryUrl, it.name)
HttpRequests.request(packageDetailsUrl)
.withBasicAuthorization(it.repository)
.readTimeout(3000)
.readString()
@@ -101,10 +101,10 @@ abstract class PipBasedRepositoryManager(project: Project, sdk: Sdk) : PythonRep
}
private fun tryParsingVersionsFromPage(name: String, repositoryUrl: String?): List<String>? {
val actualUrl = repositoryUrl ?: PyPIPackageRepository.repositoryUrl!!
val actualRepositoryUrl = repositoryUrl ?: PyPIPackageRepository.repositoryUrl
?: error("Can't resolve repository url for $name")
val versions = runCatching {
val url = StringUtil.trimEnd(actualUrl, "/") + "/" + name
PyPIPackageUtil.parsePackageVersionsFromArchives(url, name)
PyPIPackageUtil.parsePackageVersionsFromRepository(actualRepositoryUrl, name)
}
return versions.getOrNull()
}

View File

@@ -4,6 +4,7 @@ package com.jetbrains.python.packaging.repository
import com.intellij.util.io.HttpRequests
import com.intellij.util.io.RequestBuilder
import com.jetbrains.python.packaging.PyPIPackageUtil
import org.jetbrains.annotations.ApiStatus
import java.net.URLEncoder
import java.nio.charset.StandardCharsets
@@ -39,4 +40,4 @@ internal fun encodeCredentialsForUrl(login: String, password: String): String {
object PyEmptyPackagePackageRepository : PyPackageRepository("empty repository", "", "")
@ApiStatus.Experimental
object PyPIPackageRepository : PyPackageRepository("PyPI", "https://pypi.python.org/simple", "")
object PyPIPackageRepository : PyPackageRepository("PyPI", PyPIPackageUtil.PYPI_LIST_URL, "")