Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Differentiate between an object that has a literal * field, and a pointer to an object's additionalProperties shape. #1200

Merged
merged 5 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 34 additions & 24 deletions crates/doc/src/ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ pub enum Token {
Index(usize),
/// JSON object property name without escaping. Never an integer.
Property(String),
// Represents the concept of "the next property" to be added
NextProperty,
/// Next JSON index which is one beyond the current array extent.
/// If applied to a JSON object, the property literal "-" is used.
NextIndex,
}

impl Token {
pub fn from_str(s: &str) -> Self {
if s == "-" {
Token::NextIndex
} else if s.starts_with('+') || (s.starts_with('0') && s.len() > 1) {
if s.starts_with('+') || (s.starts_with('0') && s.len() > 1) {
Token::Property(s.to_string())
} else if let Ok(ind) = usize::from_str(&s) {
Token::Index(ind)
Expand All @@ -33,6 +33,7 @@ impl<'t> std::fmt::Display for Token {
match self {
Token::Index(ind) => write!(f, "{ind}"),
Token::Property(prop) => write!(f, "{prop}"),
Token::NextProperty => write!(f, "*"),
Token::NextIndex => write!(f, "-"),
}
}
Expand All @@ -53,12 +54,11 @@ impl Pointer {
/// ```
/// use doc::ptr::{Pointer, Token};
///
/// let pointer = Pointer::from_str("/foo/ba~1ar/3/-");
/// let pointer = Pointer::from_str("/foo/ba~1ar/3");
/// let expected_tokens = vec![
/// Token::Property("foo".to_string()),
/// Token::Property("ba/ar".to_string()),
/// Token::Index(3),
/// Token::NextIndex,
/// ];
/// assert_eq!(expected_tokens, pointer.0);
/// ```
Expand Down Expand Up @@ -116,6 +116,9 @@ impl Pointer {
json::Location::EndOfArray(_) => {
ptr.push(Token::NextIndex);
}
json::Location::NextProperty(_) => {
ptr.push(Token::NextProperty);
}
}
ptr
})
Expand All @@ -140,12 +143,12 @@ impl Pointer {
Node::Object(fields) => match token {
Token::Index(ind) => fields.get(&ind.to_string()),
Token::Property(property) => fields.get(&property),
Token::NextIndex => fields.get("-"),
Token::NextProperty | Token::NextIndex => None,
}
.map(|field| field.value()),
Node::Array(arr) => match token {
Token::Index(ind) => arr.get(*ind),
Token::Property(_) | Token::NextIndex => None,
Token::Property(_) | Token::NextIndex | Token::NextProperty => None,
},
_ => None,
};
Expand Down Expand Up @@ -181,7 +184,7 @@ impl Pointer {
// which we'll create the next child location.
if let Value::Null = v {
match token {
Token::Property(_) => {
Token::Property(_) | Token::NextProperty => {
*v = Value::Object(serde_json::map::Map::new());
}
Token::Index(_) | Token::NextIndex => {
Expand All @@ -195,7 +198,7 @@ impl Pointer {
// Create or modify existing entry.
Token::Index(ind) => map.entry(ind.to_string()).or_insert(Value::Null),
Token::Property(prop) => map.entry(prop).or_insert(Value::Null),
Token::NextIndex => map.entry("-").or_insert(Value::Null),
Token::NextProperty | Token::NextIndex => return None,
},
Value::Array(arr) => match token {
Token::Index(ind) => {
Expand All @@ -212,7 +215,7 @@ impl Pointer {
arr.last_mut().unwrap()
}
// Cannot match (attempt to query property of an array).
Token::Property(_) => return None,
Token::Property(_) | Token::NextProperty => return None,
},
Value::Number(_) | Value::Bool(_) | Value::String(_) => {
return None; // Cannot match (attempt to take child of scalar).
Expand All @@ -239,9 +242,10 @@ impl Pointer {
Token::Property(_) => {
*v = HeapNode::Object(BumpVec::new());
}
Token::Index(_) | Token::NextIndex => {
Token::Index(_) => {
*v = HeapNode::Array(BumpVec::new());
}
Token::NextProperty | Token::NextIndex => return None,
};
}

Expand All @@ -250,7 +254,7 @@ impl Pointer {
// Create or modify existing entry.
Token::Index(ind) => fields.insert_property(&ind.to_string(), alloc),
Token::Property(property) => fields.insert_property(property, alloc),
Token::NextIndex => fields.insert_property("-", alloc),
Token::NextProperty | Token::NextIndex => return None,
},
HeapNode::Array(arr) => match token {
Token::Index(ind) => {
Expand All @@ -270,7 +274,7 @@ impl Pointer {
arr.last_mut().unwrap()
}
// Cannot match (attempt to query property of an array).
Token::Property(_) => return None,
Token::Property(_) | Token::NextProperty => return None,
},
HeapNode::Bool(_)
| HeapNode::Bytes(_)
Expand Down Expand Up @@ -333,6 +337,7 @@ impl std::fmt::Display for Pointer {
write!(f, "/")?;
match item {
Token::NextIndex => write!(f, "-")?,
Token::NextProperty => write!(f, "*")?,
Token::Property(p) => write!(f, "{}", replace_escapes(p))?,
Token::Index(ind) => write!(f, "{}", ind)?,
};
Expand All @@ -354,12 +359,11 @@ mod test {
use Token::*;

// Basic example.
let ptr = Pointer::from("/p1/2/p3/-");
let ptr = Pointer::from("/p1/2/p3");
assert!(vec![
Property("p1".to_string()),
Index(2),
Property("p3".to_string()),
NextIndex
Property("p3".to_string())
]
.iter()
.eq(ptr.iter()));
Expand All @@ -384,13 +388,12 @@ mod test {
);

// Handles disallowed integer representations.
let ptr = Pointer::from("/01/+2/-3/4/-");
let ptr = Pointer::from("/01/+2/-3/4");
assert!(vec![
Property("01".to_string()),
Property("+2".to_string()),
Property("-3".to_string()),
Index(4),
NextIndex,
Index(4)
]
.iter()
.eq(ptr.iter()));
Expand Down Expand Up @@ -482,10 +485,9 @@ mod test {
("/foo/2/a", json!("hello")),
// Add property to existing object.
("/foo/2/b", json!(3)),
("/foo/0", json!(false)), // Update existing Null.
("/bar", json!(null)), // Add property to doc root.
("/foo/0", json!(true)), // Update from 'false'.
("/foo/-", json!("world")), // NextIndex extends Array.
("/foo/0", json!(false)), // Update existing Null.
("/bar", json!(null)), // Add property to doc root.
("/foo/0", json!(true)), // Update from 'false'.
// Index token is interpreted as property because object exists.
("/foo/2/4", json!(5)),
// NextIndex token is also interpreted as property.
Expand All @@ -502,7 +504,7 @@ mod test {
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add back the /foo/- case here by manually building up the tokens? You can't implicitly build them by parsing a pointer as it used to do, but you can still build it manually.

Also add a case covering creation with NextProperty. It'd return None, right?

Copy link
Contributor Author

@jshearer jshearer Oct 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got totally side-tracked and forgot to finished this. What do you mean by adding /foo/- back? That pointer wouldn't make any sense even if I did create it by hand: /foo is an array, and - is interpreted as an object field called -, which you can't use to index into an array array.

Also add a case covering creation with NextProperty. It'd return None, right?

Done

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant build it up from tokens, rather than parsing, so that it's the NextIndex variant.

It's possible to build such a pointer and pass it to create, right? Let's cover that in the test.

let expect = json!({
"foo": [true, null, {"-": false, "a": "hello", "b": 3, "4": 5}, "world"],
"foo": [true, null, {"-": false, "a": "hello", "b": 3, "4": 5}],
"bar": null,
});

Expand All @@ -513,6 +515,7 @@ mod test {
for case in [
"/foo/2/a/3", // Attempt to index string scalar.
"/foo/bar", // Attempt to take property of array.
"/foo/-", // Attempt to take property of array
]
.iter()
{
Expand All @@ -521,6 +524,13 @@ mod test {
assert!(ptr.create_value(&mut root_value).is_none());
assert!(ptr.create_heap_node(&mut root_heap_doc, &alloc).is_none());
}

let next_index_ptr = Pointer::from_iter(
vec![Token::Property("foo".to_string()), Token::NextProperty].into_iter(),
);

let res = next_index_ptr.create_value(&mut root_value);
assert_eq!(res, None);
}

#[test]
Expand Down
41 changes: 33 additions & 8 deletions crates/doc/src/shape/limits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ fn squash_location_inner(shape: &mut Shape, name: &Token) {
match name {
// Squashing of `additional*` fields is not possible here because we don't
// have access to the parent shape
Token::NextIndex => unreachable!(),
Token::Property(prop) if prop == "*" => unreachable!(),

Token::NextIndex | Token::NextProperty => unreachable!(),
Token::Index(_) => {
// Pop the last element from the array tuple shape to avoid
// shifting the indexes of any other tuple shapes
Expand Down Expand Up @@ -99,15 +97,13 @@ fn squash_location(shape: &mut Shape, location: &[Token]) {
match location {
[] => unreachable!(),
[Token::NextIndex] => unreachable!(),
[Token::Property(prop_name)] if prop_name == "*" => unreachable!(),
[Token::NextProperty] => unreachable!(),

[first] => squash_location_inner(shape, first),
[first, more @ ..] => {
let inner = match first {
Token::NextIndex => shape.array.additional_items.as_deref_mut(),
Token::Property(prop_name) if prop_name == "*" => {
shape.object.additional_properties.as_deref_mut()
}
Token::NextProperty => shape.object.additional_properties.as_deref_mut(),
Token::Index(idx) => shape.array.tuple.get_mut(*idx),
Token::Property(prop_name) => shape
.object
Expand Down Expand Up @@ -136,7 +132,7 @@ pub fn enforce_shape_complexity_limit(shape: &mut Shape, limit: usize) {
// but we don't want to include those locations that are leaf nodes, since
// leaf node recursion is squashed every time we squash a concrete property.
[.., Token::NextIndex] => None,
[.., Token::Property(prop_name)] if prop_name == "*" => None,
[.., Token::NextProperty] => None,
[] => None,
_ => Some(ptr),
})
Expand Down Expand Up @@ -453,4 +449,33 @@ mod test {
Some(0),
);
}

#[test]
fn test_quickcheck_regression_3() {
widening_snapshot_helper(
Some(
r#"
type: object
required:
- ""
- "*"
properties:
"":
type: "null"
"*":
type: string
maxLength: 0
additionalProperties: false
"#,
),
r#"
type: object
additionalProperties:
type: ["null", "string"]
maxLength: 0
"#,
&[],
Some(0),
);
}
}
14 changes: 12 additions & 2 deletions crates/doc/src/shape/location.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,15 @@ impl Shape {
Exists::Cannot,
),

Token::NextProperty if self.type_.overlaps(types::OBJECT) => (
self.object
.additional_properties
.as_ref()
.map(AsRef::as_ref)
.unwrap_or(&SENTINEL_SHAPE),
Exists::Cannot,
),

Token::Property(property) if self.type_.overlaps(types::OBJECT) => {
self.obj_property_location(property)
}
Expand All @@ -134,6 +143,7 @@ impl Shape {
Token::Index(_) => (&SENTINEL_SHAPE, Exists::Cannot),
Token::NextIndex => (&SENTINEL_SHAPE, Exists::Cannot),
Token::Property(_) => (&SENTINEL_SHAPE, Exists::Cannot),
Token::NextProperty => (&SENTINEL_SHAPE, Exists::Cannot),
}
}

Expand Down Expand Up @@ -211,7 +221,7 @@ impl Shape {

if let Some(child) = &self.object.additional_properties {
child.locations_inner(
location.push_prop("*"),
location.push_next_property(),
exists.extend(Exists::May),
true,
out,
Expand Down Expand Up @@ -330,7 +340,7 @@ mod test {
(&arr1, "/3", ("addl-item", Exists::May)),
(&arr1, "/9", ("addl-item", Exists::May)),
(&arr1, "/10", ("addl-item", Exists::Cannot)),
(&arr1, "/-", ("addl-item", Exists::Cannot)),
(&arr1, "/-", ("<missing>", Exists::Cannot)),
(&arr2, "/0", ("0", Exists::May)),
(&arr2, "/1", ("1", Exists::May)),
(&arr2, "/123", ("<missing>", Exists::Implicit)),
Expand Down
3 changes: 1 addition & 2 deletions crates/doc/src/shape/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,10 @@ fn shape_from(schema_yaml: &str) -> Shape {

#[cfg(test)]
mod test {
use super::{ArrayShape, ObjShape, Shape, StringShape};

#[test]
#[cfg(target_arch = "x86_64")]
fn shape_size_regression() {
use super::{ArrayShape, ObjShape, Shape, StringShape};
assert_eq!(std::mem::size_of::<ObjShape>(), 56);
assert_eq!(std::mem::size_of::<StringShape>(), 48);
assert_eq!(std::mem::size_of::<ArrayShape>(), 48);
Expand Down
11 changes: 11 additions & 0 deletions crates/json/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pub enum Location<'a> {
Property(LocatedProperty<'a>),
Item(LocatedItem<'a>),
EndOfArray(&'a Location<'a>),
NextProperty(&'a Location<'a>),
}

impl<'a> Location<'a> {
Expand Down Expand Up @@ -87,6 +88,11 @@ impl<'a> Location<'a> {
Location::EndOfArray(self)
}

// Returns a new Location that extends this one with a pointer to the object's additionalProperties ("*").
pub fn push_next_property(&'a self) -> Location<'a> {
Location::NextProperty(self)
}

/// Returns a struct that implements `std::fmt::Display` to provide a string representation of
/// the location as a JSON pointer that does no escaping besides '~' and '/'.
pub fn pointer_str(&self) -> PointerStr {
Expand Down Expand Up @@ -128,6 +134,9 @@ impl<'a> Location<'a> {
Location::EndOfArray(parent) => {
acc = parent.fold_inner(acc, fun);
}
Location::NextProperty(parent) => {
acc = parent.fold_inner(acc, fun);
}
}
fun(*self, acc)
}
Expand Down Expand Up @@ -181,6 +190,7 @@ impl<'a> fmt::Display for PointerStr<'a> {
}
Location::Item(LocatedItem { index, .. }) => write!(f, "/{}", index),
Location::EndOfArray(_) => write!(f, "/-"),
Location::NextProperty(_) => write!(f, "/*"),
})
})
}
Expand Down Expand Up @@ -209,6 +219,7 @@ impl<'a> fmt::Display for UrlEscaped<'a> {
}
Location::Item(LocatedItem { index, .. }) => write!(f, "/{}", index),
Location::EndOfArray(_) => write!(f, "/-"),
Location::NextProperty(_) => write!(f, "/*"),
})
})
}
Expand Down
1 change: 1 addition & 0 deletions crates/parser/src/decorate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub fn display_ptr(ptr: &Pointer) -> String {
Token::Property(p) => buf.push_str(p),
Token::Index(i) => write!(&mut buf, "{}", i).unwrap(),
Token::NextIndex => buf.push('-'),
Token::NextProperty => buf.push('*'),
}
}
buf
Expand Down
Loading