nixpkgs/pkgs/development/python-modules/scrapy/default.nix
Artturin e0464e4788 treewide: replace stdenv.is with stdenv.hostPlatform.is
In preparation for the deprecation of `stdenv.isX`.

These shorthands are not conducive to cross-compilation because they
hide the platforms.

Darwin might get cross-compilation for which the continued usage of `stdenv.isDarwin` will get in the way

One example of why this is bad and especially affects compiler packages
https://www.github.com/NixOS/nixpkgs/pull/343059

There are too many files to go through manually but a treewide should
get users thinking when they see a `hostPlatform.isX` in a place where it
doesn't make sense.

```
fd --type f "\.nix" | xargs sd --fixed-strings "stdenv.is" "stdenv.hostPlatform.is"
fd --type f "\.nix" | xargs sd --fixed-strings "stdenv'.is" "stdenv'.hostPlatform.is"
fd --type f "\.nix" | xargs sd --fixed-strings "clangStdenv.is" "clangStdenv.hostPlatform.is"
fd --type f "\.nix" | xargs sd --fixed-strings "gccStdenv.is" "gccStdenv.hostPlatform.is"
fd --type f "\.nix" | xargs sd --fixed-strings "stdenvNoCC.is" "stdenvNoCC.hostPlatform.is"
fd --type f "\.nix" | xargs sd --fixed-strings "inherit (stdenv) is" "inherit (stdenv.hostPlatform) is"
fd --type f "\.nix" | xargs sd --fixed-strings "buildStdenv.is" "buildStdenv.hostPlatform.is"
fd --type f "\.nix" | xargs sd --fixed-strings "effectiveStdenv.is" "effectiveStdenv.hostPlatform.is"
fd --type f "\.nix" | xargs sd --fixed-strings "originalStdenv.is" "originalStdenv.hostPlatform.is"
```
2024-09-25 00:04:37 +03:00

156 lines
3.3 KiB
Nix

{
lib,
stdenv,
botocore,
buildPythonPackage,
cryptography,
cssselect,
defusedxml,
fetchFromGitHub,
glibcLocales,
installShellFiles,
itemadapter,
itemloaders,
jmespath,
lxml,
packaging,
parsel,
pexpect,
protego,
pydispatcher,
pyopenssl,
pytest-xdist,
pytestCheckHook,
pythonOlder,
queuelib,
service-identity,
setuptools,
sybil,
testfixtures,
tldextract,
twisted,
uvloop,
w3lib,
zope-interface,
}:
buildPythonPackage rec {
pname = "scrapy";
version = "2.11.2";
pyproject = true;
disabled = pythonOlder "3.8";
src = fetchFromGitHub {
owner = "scrapy";
repo = "scrapy";
rev = "refs/tags/${version}";
hash = "sha256-EaO1kQ3VSTwEW+r0kSKycOxHNTPwwCVjch1ZBrTU0qQ=";
};
pythonRelaxDeps = [
"defusedxml"
];
nativeBuildInputs = [
installShellFiles
setuptools
];
propagatedBuildInputs = [
cryptography
cssselect
defusedxml
itemadapter
itemloaders
lxml
packaging
parsel
protego
pydispatcher
pyopenssl
queuelib
service-identity
tldextract
twisted
w3lib
zope-interface
];
nativeCheckInputs = [
botocore
glibcLocales
jmespath
pexpect
pytest-xdist
pytestCheckHook
sybil
testfixtures
uvloop
];
LC_ALL = "en_US.UTF-8";
disabledTestPaths = [
"tests/test_proxy_connect.py"
"tests/test_utils_display.py"
"tests/test_command_check.py"
# Don't test the documentation
"docs"
];
disabledTests =
[
# Requires network access
"AnonymousFTPTestCase"
"FTPFeedStorageTest"
"FeedExportTest"
"test_custom_asyncio_loop_enabled_true"
"test_custom_loop_asyncio"
"test_custom_loop_asyncio_deferred_signal"
"FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
"test_persist"
"test_timeout_download_from_spider_nodata_rcvd"
"test_timeout_download_from_spider_server_hangs"
"test_unbounded_response"
"CookiesMiddlewareTest"
# Test fails on Hydra
"test_start_requests_laziness"
]
++ lib.optionals stdenv.hostPlatform.isDarwin [
"test_xmliter_encoding"
"test_download"
"test_reactor_default_twisted_reactor_select"
"URIParamsSettingTest"
"URIParamsFeedOptionTest"
# flaky on darwin-aarch64
"test_fixed_delay"
"test_start_requests_laziness"
];
postInstall = ''
installManPage extras/scrapy.1
installShellCompletion --cmd scrapy \
--zsh extras/scrapy_zsh_completion \
--bash extras/scrapy_bash_completion
'';
pythonImportsCheck = [ "scrapy" ];
__darwinAllowLocalNetworking = true;
meta = with lib; {
description = "High-level web crawling and web scraping framework";
mainProgram = "scrapy";
longDescription = ''
Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
websites and extract structured data from their pages. It can be used for a wide
range of purposes, from data mining to monitoring and automated testing.
'';
homepage = "https://scrapy.org/";
changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
license = licenses.bsd3;
maintainers = with maintainers; [ vinnymeller ];
};
}