From a4c3d066f399f3a4e5da8aca6e5849693cc2776e Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Thu, 7 Dec 2023 21:58:02 +0000 Subject: [PATCH] remove julia prompt from code blocks --- src/code_generation.jl | 36 +++++++++++++++++++++++++++++++-- test/code_generation.jl | 44 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/code_generation.jl b/src/code_generation.jl index 3f2e81196..81f5f6cda 100644 --- a/src/code_generation.jl +++ b/src/code_generation.jl @@ -163,6 +163,38 @@ function detect_missing_packages(imports_required::AbstractVector{<:Symbol}) end end +"Checks if a given string has a Julia prompt (`julia> `) at the beginning of a line." +has_julia_prompt(s::T) where {T <: AbstractString} = occursin(r"^julia> "m, s) + +""" + remove_julia_prompt(s::T) where {T<:AbstractString} + +If it detects a julia prompt, it removes it and all lines that do not have it (except for those that belong to the code block). +""" +function remove_julia_prompt(s::T) where {T <: AbstractString} + if !has_julia_prompt(s) + return s + end + # Has julia prompt, so we need to parse it line by line + lines = split(s, '\n') + code_line = false + io = IOBuffer() + for line in lines + if startswith(line, r"^julia> ") + code_line = true + # remove the prompt + println(io, replace(line, "julia> " => "")) + elseif code_line && startswith(line, r"^ ") + # continuation of the code line + println(io, line) + else + code_line = false + end + end + # strip removes training whitespace and newlines + String(take!(io)) |> strip +end + """ extract_code_blocks(markdown_content::String) -> Vector{String} @@ -215,8 +247,8 @@ function extract_code_blocks(markdown_content::AbstractString) # Find all matches and extract the code matches = eachmatch(pattern, markdown_content) - # Extract and clean the code blocks - code_blocks = String[m.captures[1] for m in matches] + # Extract and clean the code blocks (remove the julia prompt) + code_blocks = String[remove_julia_prompt(m.captures[1]) for m in matches] return code_blocks end diff --git a/test/code_generation.jl b/test/code_generation.jl index e80ca1fbb..9db6ca8a4 100644 --- a/test/code_generation.jl +++ b/test/code_generation.jl @@ -1,6 +1,6 @@ using PromptingTools: extract_julia_imports using PromptingTools: detect_pkg_operation, detect_missing_packages, extract_function_name -using PromptingTools: extract_code_blocks, eval! +using PromptingTools: has_julia_prompt, remove_julia_prompt, extract_code_blocks, eval! @testset "extract_imports tests" begin @test extract_julia_imports("using Test, LinearAlgebra") == @@ -29,6 +29,48 @@ end @test detect_pkg_operation("import Pkg;") == false end +@testset "has_julia_prompt" begin + @test has_julia_prompt("julia> a=1") + @test has_julia_prompt(""" +# something else first +julia> a=1 +""") + @test !has_julia_prompt(""" + # something + # new + a=1 + """) +end + +@testset "remove_julia_prompt" begin + @test remove_julia_prompt("julia> a=1") == "a=1" + @test remove_julia_prompt(""" +# something else first +julia> a=1 +# output +""") == "a=1" + @test remove_julia_prompt(""" + # something + # new + a=1 + """) == """ + # something + # new + a=1 + """ + @test remove_julia_prompt(""" +julia> a=\"\"\" + hey + there + \"\"\" +"hey\nthere\n" + """) == """ +a=\"\"\" + hey + there + \"\"\"""" +end + @testset "extract_code_blocks" begin # Single Julia Code Block markdown_content = """