Skip to content

Commit

Permalink
Auto merge of rust-lang#12280 - weihanglo:git-ref-ambiguity, r=ehuss
Browse files Browse the repository at this point in the history
fix: encode URL params correctly for SourceId in Cargo.lock

We use [`form_urlencoded::byte_serialize`](https://docs.rs/form_urlencoded/1.2.0/form_urlencoded/fn.byte_serialize.html), which is re-exported by `url` crate. Tests are copied from <rust-lang#11086>. Kudos to the original author!
  • Loading branch information
bors committed Jul 23, 2023
2 parents af8643c + 273285f commit 7c82ff2
Show file tree
Hide file tree
Showing 6 changed files with 363 additions and 21 deletions.
104 changes: 93 additions & 11 deletions src/cargo/core/resolver/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,13 @@ impl EncodableResolve {
let enc_id = EncodablePackageId {
name: pkg.name.clone(),
version: Some(pkg.version.clone()),
source: pkg.source,
source: pkg.source.clone(),
};

if !all_pkgs.insert(enc_id.clone()) {
anyhow::bail!("package `{}` is specified twice in the lockfile", pkg.name);
}
let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) {
let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) {
// We failed to find a local package in the workspace.
// It must have been removed and should be ignored.
None => {
Expand Down Expand Up @@ -366,7 +366,7 @@ impl EncodableResolve {

let mut unused_patches = Vec::new();
for pkg in self.patch.unused {
let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) {
let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) {
Some(&src) => PackageId::new(&pkg.name, &pkg.version, src)?,
None => continue,
};
Expand Down Expand Up @@ -488,17 +488,95 @@ impl Patch {
pub struct EncodableDependency {
name: String,
version: String,
source: Option<SourceId>,
source: Option<EncodableSourceId>,
checksum: Option<String>,
dependencies: Option<Vec<EncodablePackageId>>,
replace: Option<EncodablePackageId>,
}

/// Pretty much equivalent to [`SourceId`] with a different serialization method.
///
/// The serialization for `SourceId` doesn't do URL encode for parameters.
/// In contrast, this type is aware of that whenever [`ResolveVersion`] allows
/// us to do so (v4 or later).
///
/// [`EncodableResolve`] turns into a `
#[derive(Deserialize, Debug, PartialOrd, Ord, Clone)]
#[serde(transparent)]
pub struct EncodableSourceId {
inner: SourceId,
/// We don't care about the deserialization of this, as the `url` crate
/// will always decode as the URL was encoded. Only when a [`Resolve`]
/// turns into a [`EncodableResolve`] will it set the value accordingly
/// via [`encodable_source_id`].
#[serde(skip)]
encoded: bool,
}

impl EncodableSourceId {
/// Creates a `EncodableSourceId` that always encodes URL params.
fn new(inner: SourceId) -> Self {
Self {
inner,
encoded: true,
}
}

/// Creates a `EncodableSourceId` that doesn't encode URL params. This is
/// for backward compatibility for order lockfile version.
fn without_url_encoded(inner: SourceId) -> Self {
Self {
inner,
encoded: false,
}
}

/// Encodes the inner [`SourceId`] as a URL.
fn as_url(&self) -> impl fmt::Display + '_ {
if self.encoded {
self.inner.as_encoded_url()
} else {
self.inner.as_url()
}
}
}

impl std::ops::Deref for EncodableSourceId {
type Target = SourceId;

fn deref(&self) -> &Self::Target {
&self.inner
}
}

impl ser::Serialize for EncodableSourceId {
fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
where
S: ser::Serializer,
{
s.collect_str(&self.as_url())
}
}

impl std::hash::Hash for EncodableSourceId {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.inner.hash(state)
}
}

impl std::cmp::PartialEq for EncodableSourceId {
fn eq(&self, other: &Self) -> bool {
self.inner == other.inner
}
}

impl std::cmp::Eq for EncodableSourceId {}

#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash, Clone)]
pub struct EncodablePackageId {
name: String,
version: Option<String>,
source: Option<SourceId>,
source: Option<EncodableSourceId>,
}

impl fmt::Display for EncodablePackageId {
Expand Down Expand Up @@ -535,7 +613,8 @@ impl FromStr for EncodablePackageId {
Ok(EncodablePackageId {
name: name.to_string(),
version: version.map(|v| v.to_string()),
source: source_id,
// Default to url encoded.
source: source_id.map(EncodableSourceId::new),
})
}
}
Expand Down Expand Up @@ -603,7 +682,7 @@ impl ser::Serialize for Resolve {
.map(|id| EncodableDependency {
name: id.name().to_string(),
version: id.version().to_string(),
source: encode_source(id.source_id()),
source: encodable_source_id(id.source_id(), self.version()),
dependencies: None,
replace: None,
checksum: if self.version() >= ResolveVersion::V2 {
Expand Down Expand Up @@ -676,7 +755,7 @@ fn encodable_resolve_node(
EncodableDependency {
name: id.name().to_string(),
version: id.version().to_string(),
source: encode_source(id.source_id()),
source: encodable_source_id(id.source_id(), resolve.version()),
dependencies: deps,
replace,
checksum: if resolve.version() >= ResolveVersion::V2 {
Expand All @@ -702,7 +781,7 @@ pub fn encodable_package_id(
}
}
}
let mut source = encode_source(id_to_encode).map(|s| s.with_precise(None));
let mut source = encodable_source_id(id_to_encode.with_precise(None), resolve_version);
if let Some(counts) = &state.counts {
let version_counts = &counts[&id.name()];
if version_counts[&id.version()] == 1 {
Expand All @@ -719,10 +798,13 @@ pub fn encodable_package_id(
}
}

fn encode_source(id: SourceId) -> Option<SourceId> {
fn encodable_source_id(id: SourceId, version: ResolveVersion) -> Option<EncodableSourceId> {
if id.is_path() {
None
} else {
Some(id)
Some(match version {
ResolveVersion::V4 => EncodableSourceId::new(id),
_ => EncodableSourceId::without_url_encoded(id),
})
}
}
2 changes: 2 additions & 0 deletions src/cargo/core/resolver/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ pub enum ResolveVersion {
/// Unstable. Will collect a certain amount of changes and then go.
///
/// Changes made:
///
/// * SourceId URL serialization is aware of URL encoding.
V4,
}

Expand Down
72 changes: 64 additions & 8 deletions src/cargo/core/source/source_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ impl SourceId {
pub fn as_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: false,
}
}

/// Like [`Self::as_url`] but with URL parameters encoded.
pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
SourceIdAsUrl {
inner: &*self.inner,
encoded: true,
}
}

Expand Down Expand Up @@ -566,7 +575,10 @@ impl fmt::Display for SourceId {
// Don't replace the URL display for git references,
// because those are kind of expected to be URLs.
write!(f, "{}", self.inner.url)?;
if let Some(pretty) = reference.pretty_ref() {
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
if let Some(pretty) = reference.pretty_ref(false) {
write!(f, "?{}", pretty)?;
}

Expand Down Expand Up @@ -714,6 +726,7 @@ impl Ord for SourceKind {
/// A `Display`able view into a `SourceId` that will write it as a url
pub struct SourceIdAsUrl<'a> {
inner: &'a SourceIdInner,
encoded: bool,
}

impl<'a> fmt::Display for SourceIdAsUrl<'a> {
Expand All @@ -731,7 +744,7 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> {
..
} => {
write!(f, "git+{}", url)?;
if let Some(pretty) = reference.pretty_ref() {
if let Some(pretty) = reference.pretty_ref(self.encoded) {
write!(f, "?{}", pretty)?;
}
if let Some(precise) = precise.as_ref() {
Expand Down Expand Up @@ -771,27 +784,49 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> {
impl GitReference {
/// Returns a `Display`able view of this git reference, or None if using
/// the head of the default branch
pub fn pretty_ref(&self) -> Option<PrettyRef<'_>> {
pub fn pretty_ref(&self, url_encoded: bool) -> Option<PrettyRef<'_>> {
match self {
GitReference::DefaultBranch => None,
_ => Some(PrettyRef { inner: self }),
_ => Some(PrettyRef {
inner: self,
url_encoded,
}),
}
}
}

/// A git reference that can be `Display`ed
pub struct PrettyRef<'a> {
inner: &'a GitReference,
url_encoded: bool,
}

impl<'a> fmt::Display for PrettyRef<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self.inner {
GitReference::Branch(ref b) => write!(f, "branch={}", b),
GitReference::Tag(ref s) => write!(f, "tag={}", s),
GitReference::Rev(ref s) => write!(f, "rev={}", s),
let value: &str;
match self.inner {
GitReference::Branch(s) => {
write!(f, "branch=")?;
value = s;
}
GitReference::Tag(s) => {
write!(f, "tag=")?;
value = s;
}
GitReference::Rev(s) => {
write!(f, "rev=")?;
value = s;
}
GitReference::DefaultBranch => unreachable!(),
}
if self.url_encoded {
for value in url::form_urlencoded::byte_serialize(value.as_bytes()) {
write!(f, "{value}")?;
}
} else {
write!(f, "{value}")?;
}
Ok(())
}
}

Expand Down Expand Up @@ -905,6 +940,27 @@ mod tests {
assert_eq!(formatted, "sparse+https://my-crates.io/");
assert_eq!(source_id, deserialized);
}

#[test]
fn gitrefs_roundtrip() {
let base = "https://host/path".into_url().unwrap();
let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
let s1 = SourceId::for_git(&base, branch).unwrap();
let ser1 = format!("{}", s1.as_encoded_url());
let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
let ser2 = format!("{}", s2.as_encoded_url());
// Serializing twice should yield the same result
assert_eq!(ser1, ser2, "Serialized forms don't match");
// SourceId serializing the same should have the same semantics
// This used to not be the case (# was ambiguous)
assert_eq!(s1, s2, "SourceId doesn't round-trip");
// Freeze the format to match an x-www-form-urlencoded query string
// https://url.spec.whatwg.org/#application/x-www-form-urlencoded
assert_eq!(
ser1,
"git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
);
}
}

/// Check if `url` equals to the overridden crates.io URL.
Expand Down
5 changes: 4 additions & 1 deletion src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ impl<'cfg> Debug for GitSource<'cfg> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "git repo at {}", self.remote.url())?;

match self.manifest_reference.pretty_ref() {
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
match self.manifest_reference.pretty_ref(false) {
Some(s) => write!(f, " ({})", s),
None => Ok(()),
}
Expand Down
6 changes: 5 additions & 1 deletion src/cargo/util/toml_mut/dependency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,11 @@ impl GitSource {
impl std::fmt::Display for GitSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let git_ref = self.git_ref();
if let Some(pretty_ref) = git_ref.pretty_ref() {

// TODO(-Znext-lockfile-bump): set it to true when stabilizing
// lockfile v4, because we want Source ID serialization to be
// consistent with lockfile.
if let Some(pretty_ref) = git_ref.pretty_ref(false) {
write!(f, "{}?{}", self.git, pretty_ref)
} else {
write!(f, "{}", self.git)
Expand Down
Loading

0 comments on commit 7c82ff2

Please sign in to comment.