Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes from https://github.com/ryanpeach/mdlinker/pull/54 #56

Merged
merged 3 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions DEBUGGING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# rust-lldb

https://dev.to/bmatcuk/debugging-rust-with-rust-lldb-j1f

Run `just test-debug` to run the tests in debug mode using lldb.

Now use `r <test_name>` to run a specific test. https://users.rust-lang.org/t/running-a-single-test-under-a-debugger/44460
10 changes: 10 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,13 @@ test:

test-print test_name:
RUNNING_TESTS=true RUST_LOG=trace RUST_BACKTRACE=1 cargo test -- --test-threads=1 {{test_name}}

[macos]
test-debug test_name breakpoint:
#!/bin/bash
TEST_OUTPUT=$(RUNNING_TESTS=true cargo test --no-run 2>&1 >/dev/null)
DEP1=$(echo $TEST_OUTPUT | grep -ohe 'Executable tests/logseq/main.rs (target/debug/deps/logseq-[a-z0-9]*' | awk -F'[()]' '{print $2}')
echo $DEP1
RUNNING_TESTS=true RUST_LOG=debug RUST_BACKTRACE=full rust-lldb $DEP1 \
-o "b {{breakpoint}}" \
-o "r {{test_name}}"
52 changes: 52 additions & 0 deletions bin/byte_index
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env python3

import argparse

def find_byte_indexes(text, search_term):
# Encode both the text and the search term into bytes
byte_text = text.encode('utf-8')
byte_search_term = search_term.encode('utf-8')

# Initialize a list to store the byte indexes
byte_indexes = []

# Start searching for the term in the byte_text
index = byte_text.find(byte_search_term)
while index != -1:
byte_indexes.append(index)
# Continue searching after the current match
index = byte_text.find(byte_search_term, index + 1)

return byte_indexes

def main():
# Set up argument parser
parser = argparse.ArgumentParser(description="Find byte indexes of a search term in a file.")
parser.add_argument("file", help="Path to the file to be searched")
parser.add_argument("search_term", help="The term to search for in the file")

# Parse the arguments
args = parser.parse_args()

# Read the file
try:
with open(args.file, 'r', encoding='utf-8') as f:
file_content = f.read()
except FileNotFoundError:
print(f"Error: File '{args.file}' not found.")
return
except Exception as e:
print(f"Error reading file: {e}")
return

# Find byte indexes
indexes = find_byte_indexes(file_content, args.search_term)

# Print the results
if indexes:
print(f"Found '{args.search_term}' at byte indexes: {indexes}")
else:
print(f"'{args.search_term}' not found in the file.")

if __name__ == "__main__":
main()
36 changes: 18 additions & 18 deletions src/file/content/wikilink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ impl Alias {
self.0.is_empty()
}
#[must_use]
pub fn len(&self) -> usize {
pub fn char_len(&self) -> usize {
self.0.chars().count()
}
}
Expand Down Expand Up @@ -104,26 +104,26 @@ impl Visitor for WikilinkVisitor {
.expect("Otherwise the regex wouldn't match")
.as_str(),
);
let capture_start_byte = captures
.get(1)
.expect("The regex has 2 capture groups")
.start();
let text_without_frontmatter = remove_frontmatter_from_source(source, node);
let sourcepos_start_offset_bytes = SourceOffset::from_location(
text_without_frontmatter,
sourcepos.start.line,
sourcepos.start.column,
)
.offset();
let span = SourceSpan::new(
(sourcepos_start_offset_bytes + capture_start_byte).into(),
alias.char_len(),
);
let span_repaired = repair_span_due_to_frontmatter(span, node);
self.wikilinks.push(
Wikilink::builder()
.alias(alias.clone())
.span(repair_span_due_to_frontmatter(
SourceSpan::new(
(SourceOffset::from_location(
remove_frontmatter_from_source(source, node),
sourcepos.start.line,
sourcepos.start.column,
)
.offset()
+ captures
.get(1)
.expect("The regex has 2 capture groups")
.start())
.into(),
alias.len(),
),
node,
))
.span(span_repaired)
.build(),
);
}
Expand Down
30 changes: 15 additions & 15 deletions src/rules/unlinked_text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,20 @@ impl Visitor for UnlinkedTextVisitor {
continue;
}
let alias = Alias::new(&patterns[found.pattern().as_usize()]);
let span = repair_span_due_to_frontmatter(
SourceSpan::new(
(SourceOffset::from_location(
remove_frontmatter_from_source(source, node),
sourcepos.start.line,
sourcepos.start.column,
)
.offset()
+ found.start())
.into(),
found.end() - found.start(),
),
node,
);
if "lorem" == alias.to_string() {
println!("Found lorem");
}
let text_without_frontmatter = remove_frontmatter_from_source(source, node);
let sourcepos_start_offset_bytes = SourceOffset::from_location(
text_without_frontmatter,
sourcepos.start.line,
sourcepos.start.column,
)
.offset();
let byte_length = found.end() - found.start();
let offset_bytes = sourcepos_start_offset_bytes + found.start();
let span = SourceSpan::new(offset_bytes.into(), byte_length);
let span_repaired = repair_span_due_to_frontmatter(span, node);

// Dont match inside wikilinks
if let Some(parent) = parent {
Expand All @@ -204,7 +204,7 @@ impl Visitor for UnlinkedTextVisitor {
}
}

self.new_unlinked_texts.push((alias, span));
self.new_unlinked_texts.push((alias, span_repaired));
}
}
Ok(())
Expand Down
14 changes: 14 additions & 0 deletions src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ pub enum ParseError {
#[backtrace]
source: std::io::Error,
},
#[error("Multibyte characters found in the file {file:?}")]
MultibyteError {
file: PathBuf,
backtrace: backtrace::Backtrace,
},
#[error("Error parsing the source code for file {file:?} using tree-sitter")]
TreeSitter {
file: PathBuf,
Expand All @@ -119,6 +124,15 @@ pub fn parse(path: &PathBuf, visitors: Vec<Rc<RefCell<dyn Visitor>>>) -> Result<
file: path.clone(),
source,
})?;

// Check for multibyte characters
if source.chars().count() != source.len() {
return Err(ParseError::MultibyteError {
file: path.clone(),
backtrace: backtrace::Backtrace::force_capture(),
});
}

// Parse the source code
let arena = Arena::new();
let options = ExtensionOptionsBuilder::default()
Expand Down
8 changes: 4 additions & 4 deletions tests/logseq/unlinked_text/assets/journals/2024_08_10.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
- [[Tvtinl]]
- Axndco/Pbyudhm
- Zqxzn Avdxyxofum
- Zqxzn Avdxyxofu'm
- Vgrjsat
- Foueh eypvsd?
- Xotw wlhcgrryrj
- Cbvn ocjrla
- Siuh lnmcnlv
- Jztvzm 21 pdyhwjzvnx
- Hfxeno aq tmnz d lecc ymdeuz
- Lh cy b brkbkgmtz frg ksocv gomu yh mvrfw ysnt doeun bzim, eka aclmy vrpa hys oz tuk mpb nlle lyn icazyvey ik my peub xr
- Lh cy b brkbkgmtz frg ksocv gomu yh mvr'fw ysnt doeun bzim, eka aclmy vrpa hys oz tuk mpb nlle lyn icazyvey ik my peub xr
- [[Mek Xvmr]]
- Usp tcr itlo cb zsez hjmrou ler f pdv
- "Usp tcr itlo cb zsez hjmrou ler f pdv"
- Pav bvqv egx ic jxa rto dd amr nwd. Hwt sq vabt kyzk hhx yp, tb j qyt cebfd:
- Jjyyvwrro xkbj acor uzifhktovnah hfbyv, udh hluq hvv jx uouao whnp ik wxagb knyiyds
- Jjyyvwrro xkbj acor uzifhktovnah hfbyv, udh hluq hvv jx uoua'o whnp ik wxagb knyiyds
Loading