Skip to content

Some ICU4X 2.1 crates are published without dev-dependencies in Cargo.toml #7196

@musicinmybrain

Description

@musicinmybrain

In Fedora, we package each Rust crate library as a separate distribution package. We package a subset of ICU4X. I was starting to look at the updates associated with the ICU4X 2.1 release, and I noticed that the dev-dependencies are missing from Cargo.toml (both the normalized version and Cargo.toml.orig) in some of the published crates. Consider zerovec:

$ git clone https://github.com/unicode-org/icu4x.git
$ cd icu4x
$ curl -L https://crates.io/api/v1/crates/zerovec/0.11.5/download -o zerovec-0.11.5.crate
$ cat zerovec-0.11.5/Cargo.toml
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
rust-version = "1.82"
name = "zerovec"
version = "0.11.5"
authors = ["The ICU4X Project Developers"]
build = false
include = [
    "data/**/*",
    "src/**/*",
    "examples/**/*",
    "benches/**/*",
    "tests/**/*",
    "Cargo.toml",
    "LICENSE",
    "README.md",
    "build.rs",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Zero-copy vector backed by a byte array"
readme = "README.md"
keywords = [
    "zerocopy",
    "serialization",
    "zero-copy",
    "serde",
]
categories = [
    "rust-patterns",
    "memory-management",
    "caching",
    "no-std",
    "data-structures",
]
license = "Unicode-3.0"
repository = "https://github.com/unicode-org/icu4x"

[package.metadata.workspaces]
independent = true

[package.metadata.docs.rs]
all-features = true

[package.metadata.cargo-all-features]
max_combination_size = 3

[features]
alloc = ["serde?/alloc"]
databake = ["dep:databake"]
derive = ["dep:zerovec-derive"]
hashmap = [
    "dep:twox-hash",
    "alloc",
]
serde = ["dep:serde"]
std = []
yoke = ["dep:yoke"]

[lib]
name = "zerovec"
path = "src/lib.rs"
bench = false

[[example]]
name = "zv_serde"
path = "examples/zv_serde.rs"
required-features = ["serde"]

[[bench]]
name = "vzv"
path = "benches/vzv.rs"
harness = false

[[bench]]
name = "zeromap"
path = "benches/zeromap.rs"
harness = false
required-features = [
    "serde",
    "hashmap",
    "derive",
]

[[bench]]
name = "zerovec"
path = "benches/zerovec.rs"
harness = false

[[bench]]
name = "zerovec_iai"
path = "benches/zerovec_iai.rs"
harness = false

[[bench]]
name = "zerovec_serde"
path = "benches/zerovec_serde.rs"
harness = false
required-features = ["serde"]

[dependencies.databake]
version = "0.2.0"
features = ["derive"]
optional = true
default-features = false

[dependencies.serde]
version = "1.0.220"
features = ["derive"]
optional = true
default-features = false

[dependencies.twox-hash]
version = "2.0.0"
features = ["xxhash64"]
optional = true
default-features = false

[dependencies.yoke]
version = "0.8.0"
optional = true
default-features = false

[dependencies.zerofrom]
version = "0.1.3"
default-features = false

[dependencies.zerovec-derive]
version = "0.11.1"
optional = true
default-features = false

Here I’ve displayed Cargo.toml, but Cargo.toml.orig also lacks the dev-dependencies.

This wasn’t the case in the previous release:

$ curl -L https://crates.io/api/v1/crates/zerovec/0.11.4/download -o zerovec-0.11.4.crate
$ tar -xzf zerovec-0.11.4.crate
$ cat zerovec-0.11.4/Cargo.toml
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
rust-version = "1.82"
name = "zerovec"
version = "0.11.4"
authors = ["The ICU4X Project Developers"]
build = false
include = [
    "data/**/*",
    "src/**/*",
    "examples/**/*",
    "benches/**/*",
    "tests/**/*",
    "Cargo.toml",
    "LICENSE",
    "README.md",
    "build.rs",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Zero-copy vector backed by a byte array"
readme = "README.md"
keywords = [
    "zerocopy",
    "serialization",
    "zero-copy",
    "serde",
]
categories = [
    "rust-patterns",
    "memory-management",
    "caching",
    "no-std",
    "data-structures",
]
license = "Unicode-3.0"
repository = "https://github.com/unicode-org/icu4x"

[package.metadata.cargo-all-features]
max_combination_size = 3

[package.metadata.docs.rs]
all-features = true

[package.metadata.workspaces]
independent = true

[features]
alloc = []
databake = ["dep:databake"]
derive = ["dep:zerovec-derive"]
hashmap = [
    "dep:twox-hash",
    "alloc",
]
serde = [
    "dep:serde",
    "alloc",
]
std = []
yoke = ["dep:yoke"]

[lib]
name = "zerovec"
path = "src/lib.rs"
bench = false

[[example]]
name = "zv_serde"
path = "examples/zv_serde.rs"
required-features = ["serde"]

[[bench]]
name = "vzv"
path = "benches/vzv.rs"
harness = false

[[bench]]
name = "zeromap"
path = "benches/zeromap.rs"
harness = false
required-features = [
    "serde",
    "hashmap",
    "derive",
]

[[bench]]
name = "zerovec"
path = "benches/zerovec.rs"
harness = false

[[bench]]
name = "zerovec_iai"
path = "benches/zerovec_iai.rs"
harness = false

[[bench]]
name = "zerovec_serde"
path = "benches/zerovec_serde.rs"
harness = false
required-features = ["serde"]

[dependencies.databake]
version = "0.2.0"
features = ["derive"]
optional = true
default-features = false

[dependencies.serde]
version = "1.0.110"
features = [
    "alloc",
    "derive",
]
optional = true
default-features = false

[dependencies.twox-hash]
version = "2.0.0"
features = ["xxhash64"]
optional = true
default-features = false

[dependencies.yoke]
version = "0.8.0"
optional = true
default-features = false

[dependencies.zerofrom]
version = "0.1.3"
default-features = false

[dependencies.zerovec-derive]
version = "0.11.1"
optional = true
default-features = false

[dev-dependencies.bincode]
version = "1.3.1"

[dev-dependencies.getrandom]
version = "0.3"
features = ["wasm_js"]

[dev-dependencies.iai]
version = "0.1.1"

[dev-dependencies.postcard]
version = "1.0.3"
features = ["use-std"]
default-features = false

[dev-dependencies.rand]
version = "0.9"

[dev-dependencies.rand_distr]
version = "0.5"

[dev-dependencies.rand_pcg]
version = "0.9"

[dev-dependencies.rmp-serde]
version = "1.2.0"

[dev-dependencies.serde]
version = "1.0.110"
features = ["derive"]
default-features = false

[dev-dependencies.serde_json]
version = "1.0.45"

[dev-dependencies.yoke]
version = "0.8.0"
features = ["derive"]
default-features = false

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies.criterion]
version = "0.5.0"

I tried checking against the git commit that was used to publish the crate.

$ cat zerovec-0.11.5/.cargo_vcs_info.json
{
  "git": {
    "sha1": "29dfe2790b6cfdab94ca6a6b69f58ce54802dbf7",
    "dirty": true
  },
  "path_in_vcs": "utils/zerovec"
}
$ git checkout 29dfe2790b6cfdab94ca6a6b69f58ce54802dbf7
$ cat utils/zerovec/Cargo.toml
# This file is part of ICU4X. For terms of use, please see the file
# called LICENSE at the top level of the ICU4X source tree
# (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

[package]
name = "zerovec"
description = "Zero-copy vector backed by a byte array"
version = "0.11.5"
categories = ["rust-patterns", "memory-management", "caching", "no-std", "data-structures"]
keywords = ["zerocopy", "serialization", "zero-copy", "serde"]

authors.workspace = true
edition.workspace = true
include.workspace = true
license.workspace = true
repository.workspace = true
rust-version = "1.82"

[package.metadata.workspaces]
independent = true

[package.metadata.docs.rs]
all-features = true

[dependencies]
zerofrom = { workspace = true }

zerovec-derive = { workspace = true, optional = true}

databake = { workspace = true, features = ["derive"], optional = true }
serde = { workspace = true, features = ["derive"], optional = true }

yoke = { workspace = true, optional = true }
twox-hash = { workspace = true, optional = true }

[dev-dependencies]
bincode = { workspace = true }
getrandom = { workspace = true, features = ["wasm_js"] }
iai = { workspace = true }
icu_benchmark_macros = { path = "../../tools/benchmark/macros" }
postcard = { workspace = true, features = ["use-std"] }
rand = { workspace = true }
rand_distr = { workspace = true }
rand_pcg = { workspace = true }
rmp-serde = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
potential_utf = { path = "../../utils/potential_utf", features = ["zerovec"] }
yoke = { workspace = true, features = ["derive"] }
zerofrom = { path = "../../utils/zerofrom", features = ["derive"] }

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = { workspace = true }

[features]
derive = ["dep:zerovec-derive"]
hashmap = ["dep:twox-hash", "alloc"]
yoke = ["dep:yoke"]
serde = ["dep:serde"]
databake = ["dep:databake"]
alloc = ["serde?/alloc"]
# No longer does anything
std = []

[package.metadata.cargo-all-features]
# We have tons of features here, limit the amount of tests we run
max_combination_size = 3

[lib]
bench = false  # This option is required for Benchmark CI

[[bench]]
name = "zerovec"
harness = false

[[bench]]
name = "zerovec_serde"
harness = false
required-features = ["serde"]

[[bench]]
name = "vzv"
harness = false

[[bench]]
name = "zerovec_iai"
harness = false

[[bench]]
name = "zeromap"
harness = false
required-features = ["serde", "hashmap", "derive"]

[[example]]
name = "zv_serde"
required-features = ["serde"]

That looks fine. If I try cargo publish, i.e.:

$ cd utils/zerovec
$ cargo publish --dry-run
$ tar -xzf ../../target/package/zerovec-0.11.5.crate
$ cat zerovec-0.11.5/Cargo.toml
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
rust-version = "1.82"
name = "zerovec"
version = "0.11.5"
authors = ["The ICU4X Project Developers"]
build = false
include = [
    "data/**/*",
    "src/**/*",
    "examples/**/*",
    "benches/**/*",
    "tests/**/*",
    "Cargo.toml",
    "LICENSE",
    "README.md",
    "build.rs",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Zero-copy vector backed by a byte array"
readme = "README.md"
keywords = [
    "zerocopy",
    "serialization",
    "zero-copy",
    "serde",
]
categories = [
    "rust-patterns",
    "memory-management",
    "caching",
    "no-std",
    "data-structures",
]
license = "Unicode-3.0"
repository = "https://github.com/unicode-org/icu4x"

[package.metadata.workspaces]
independent = true

[package.metadata.docs.rs]
all-features = true

[package.metadata.cargo-all-features]
max_combination_size = 3

[features]
alloc = ["serde?/alloc"]
databake = ["dep:databake"]
derive = ["dep:zerovec-derive"]
hashmap = [
    "dep:twox-hash",
    "alloc",
]
serde = ["dep:serde"]
std = []
yoke = ["dep:yoke"]

[lib]
name = "zerovec"
path = "src/lib.rs"
bench = false

[[example]]
name = "zv_serde"
path = "examples/zv_serde.rs"
required-features = ["serde"]

[[bench]]
name = "vzv"
path = "benches/vzv.rs"
harness = false

[[bench]]
name = "zeromap"
path = "benches/zeromap.rs"
harness = false
required-features = [
    "serde",
    "hashmap",
    "derive",
]

[[bench]]
name = "zerovec"
path = "benches/zerovec.rs"
harness = false

[[bench]]
name = "zerovec_iai"
path = "benches/zerovec_iai.rs"
harness = false

[[bench]]
name = "zerovec_serde"
path = "benches/zerovec_serde.rs"
harness = false
required-features = ["serde"]

[dependencies.databake]
version = "0.2.0"
features = ["derive"]
optional = true
default-features = false

[dependencies.serde]
version = "1.0.220"
features = ["derive"]
optional = true
default-features = false

[dependencies.twox-hash]
version = "2.0.0"
features = ["xxhash64"]
optional = true
default-features = false

[dependencies.yoke]
version = "0.8.0"
optional = true
default-features = false

[dependencies.zerofrom]
version = "0.1.3"
default-features = false

[dependencies.zerovec-derive]
version = "0.11.1"
optional = true
default-features = false

[dev-dependencies.bincode]
version = "1.3.1"

[dev-dependencies.getrandom]
version = "0.3"
features = ["wasm_js"]

[dev-dependencies.iai]
version = "0.1.1"

[dev-dependencies.postcard]
version = "1.0.3"
features = ["use-std"]
default-features = false

[dev-dependencies.rand]
version = "0.9"

[dev-dependencies.rand_distr]
version = "0.5"

[dev-dependencies.rand_pcg]
version = "0.9"

[dev-dependencies.rmp-serde]
version = "1.2.0"

[dev-dependencies.serde]
version = "1.0.220"
features = ["derive"]
default-features = false

[dev-dependencies.serde_json]
version = "1.0.45"

[dev-dependencies.yoke]
version = "0.8.0"
features = ["derive"]
default-features = false

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies.criterion]
version = "0.5.0"

Well, that is fine, too! So something has happened during the publication process to strip the dev-dependencies out, even though a naïve cargo publish leaves them in. I surmise that it could be related to the "dirty": true in .cargo_vcs_info.json, which wasn’t present in zerovec-0.11.4.

I have two concerns with these missing dev-dependencies.

  1. They are useful for us in Fedora. We patch some of them out (those for benchmarking, especially) but we do use them to build and run as many of the crates’ tests as possible during the RPM package build process.
  2. If the crates are published from a dirty git tree, what other discrepancies might there be between the contents of this git repository and the published crates?

I haven’t had time to audit a large number of crates yet, but I did observe missing dev-dependencies in zerovec and zerotrie, and I have observed that the expected dev-dependencies are present in yoke, yoke-derive, and zerovec-derive. Both of the crates with missing dev-dependencies have "dirty": true in .cargo_vcs_info.json, and none of the others do.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions