Skip to content

Commit

Permalink
Add more Julia templates
Browse files Browse the repository at this point in the history
Add templates
  • Loading branch information
svilupp authored Dec 13, 2023
2 parents 9f369f1 + 69f5622 commit 7004044
Show file tree
Hide file tree
Showing 7 changed files with 301 additions and 10 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Improved AICode parsing and error handling (eg, capture more REPL prompts, detect parsing errors earlier), including the option to remove unsafe code (eg, `Pkg.add("SomePkg")`) with `AICode(msg; skip_unsafe=true, vebose=true)`
- Improved AICode parsing and error handling (eg, capture more REPL prompts, detect parsing errors earlier, parse more code fence types), including the option to remove unsafe code (eg, `Pkg.add("SomePkg")`) with `AICode(msg; skip_unsafe=true, vebose=true)`
- Added new prompt templates: `JuliaRecapTask`, `JuliaRecapCoTTask`, `JuliaExpertTestCode` and updated `JuliaExpertCoTTask` to be more robust against early stopping for smaller OSS models

### Fixed

Expand Down
131 changes: 128 additions & 3 deletions src/code_generation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,79 @@ function isparsed(cb::AICode)
return isparsed(cb.expression) && !isparseerror(cb.error)
end

## Parsing Helpers
JULIA_EXPR_HEADS = [
:block,
:quote,
:call,
:macrocall,
:(=),
:function,
:for,
:if,
:while,
:let,
:try,
:catch,
:finally,
:method,
:tuple,
:array,
:index,
:ref,
:.,
:do,
:curly,
:typed_vcat,
:typed_hcat,
:typed_vcat,
:comprehension,
:generator,
:kw,
:where,
]
# Checks if the provided expression `ex` has some hallmarks of Julia code. Very naive!
# Serves as a quick check to avoid trying to eval output cells (```plaintext ... ```)
is_julia_expr(ex::Any) = false
function is_julia_expr(ex::Expr)
## Expression itself
Meta.isexpr(ex, JULIA_EXPR_HEADS) && return true
## Its arguments
for arg in ex.args
Meta.isexpr(arg, JULIA_EXPR_HEADS) && return true
end
## Nothing found...
return false
end

## Check if a given String seems to be a valid Julia expression (simple heuristics)
function is_julia_code(code::AbstractString)
# Try to parse the expression, return false if parsing fails
expr = try
Meta.parseall(code)
catch
return false
end

if isparsed(expr) && is_julia_expr(expr)
return true
else
return false
end
end

## Overload for AIMessage - simply extracts the code blocks and concatenates them
function AICode(msg::AIMessage;
verbose::Bool = false,
skip_unsafe::Bool = false,
kwargs...)
code = extract_code_blocks(msg.content) |> Base.Fix2(join, "\n")
code = extract_code_blocks(msg.content)
if isempty(code)
## Fallback option for generic code fence, we must check if the content is parseable
code = extract_code_blocks_fallback(msg.content) |>
x -> filter(is_julia_code, x)
end
code = join(code, "\n")
skip_unsafe && (code = remove_unsafe_lines(code; verbose))
return AICode(code; kwargs...)
end
Expand All @@ -176,8 +243,10 @@ function extract_julia_imports(input::AbstractString)
subparts = map(x -> contains(x, ':') ? split(x, ':')[1] : x,
split(subparts, ","))
subparts = replace(join(subparts, ' '), ',' => ' ')
packages = filter(!isempty, split(subparts, " ")) .|> Symbol
append!(package_names, packages)
packages = filter(x -> !isempty(x) && !startswith(x, "Base") &&
!startswith(x, "Main"),
split(subparts, " "))
append!(package_names, Symbol.(packages))
end
end
return package_names
Expand Down Expand Up @@ -303,6 +372,8 @@ The extracted code blocks are returned as a vector of strings, with each string
Note: Only the content within the code fences is extracted, and the code fences themselves are not included in the output.
See also: `extract_code_blocks_fallback`
# Arguments
- `markdown_content::String`: A string containing the markdown content from which Julia code blocks are to be extracted.
Expand Down Expand Up @@ -379,6 +450,60 @@ function extract_code_blocks(markdown_content::T) where {T <: AbstractString}
return reverse(code_blocks) # Reverse to maintain original order
end

"""
extract_code_blocks_fallback(markdown_content::String, delim::AbstractString="```")
Extract Julia code blocks from a markdown string using a fallback method (splitting by arbitrary `delim`-iters).
Much more simplistic than `extract_code_blocks` and does not support nested code blocks.
It is often used as a fallback for smaller LLMs that forget to code fence ```julia ... ```.
# Example
```julia
code = \"\"\"
\`\`\`
println("hello")
\`\`\`
Some text
\`\`\`
println("world")
\`\`\`
\"\"\"
# We extract text between triple backticks and check each blob if it looks like a valid Julia code
code_parsed = extract_code_blocks_fallback(code) |> x -> filter(is_julia_code, x) |> x -> join(x, "\n")
```
"""
function extract_code_blocks_fallback(markdown_content::T,
delim::AbstractString = "```") where {T <: AbstractString}
# Convert content and delimiters to codeunits
content_units = codeunits(markdown_content)
delim_units = codeunits(delim)
delim_positions = find_subsequence_positions(delim_units, content_units)

# Extract code blocks
eltype_ = typeof(@view(markdown_content[begin:end]))
code_blocks = Vector{eltype_}()
isempty(delim_positions) && return code_blocks

# Run the extraction
start_pos = delim_positions[1]
for end_pos in delim_positions
if end_pos > start_pos
code_block = markdown_content[(start_pos + length(delim_units)):(end_pos - 1)]
# Also remove the julia prompt
push!(code_blocks, remove_julia_prompt(strip(code_block)))
# Reset the start
start_pos = end_pos
end
end

return code_blocks
end

"""
extract_function_name(code_block::String) -> Union{String, Nothing}
Expand Down
4 changes: 2 additions & 2 deletions templates/persona-task/JuliaExpertCoTTask.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
{
"content": "Template Metadata",
"description": "For small code task in Julia language. It will first describe the approach (CoT = Chain of Thought). Placeholders: `task`, `data`",
"version": "1",
"version": "2.0",
"source": "",
"_type": "metadatamessage"
},
{
"content": "You are a world-class Julia language programmer with the knowledge of the latest syntax. Your communication is brief and concise. You precisely follow the given task and use the data when provided. When no data is provided, create some examples. First, think through your approach step by step. Then implement the solution.",
"content": "You are a world-class Julia language programmer and very systematic in your approach to solving problems. \nYou follow the below approach when writing code. Your communication is brief and concise.\n\nProblem Solving Steps:\n- Think through your approach step by step\n- Write any functions and other code you need\n- Solve the task\n- Check that your solution is correct\n\nYou precisely follow the given Task and use the Data when provided. When Data is not provided, create some examples.\n",
"variables": [],
"_type": "systemmessage"
},
Expand Down
22 changes: 22 additions & 0 deletions templates/persona-task/JuliaExpertTestCode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"content": "Template Metadata",
"description": "For writing Julia-style unit tests. It expects `code` provided as a string (it can be the whole source code of your app). Instructions are a good way to guide the model which functions to test and how. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: {{code}}, {{instructions}}",
"version": "1",
"source": "",
"_type": "metadatamessage"
},
{
"content": "You are a world-class Julia language programmer and expert in writing unit and integration tests for Julia applications.\n\nYour task is to write tests for the User's code (or a subset of it).\n\nGeneral Guidelines:\n- Your tests must be as compact as possible while comprehensively covering the functionality of the code\n- Testsets are named after the function\n- Include a brief comment explaining the purpose of each test\n- Write multiple test cases using `@test` to validate different aspects of the `add` function. Think about all pathways through the code and test each one.\n\nIf the user provides any Special Instructions, prioritize them over the General Guidelines.\n\n\nExample:\n\"\"\"\n**User's code:**\n\n```julia\nmyadd(a, b) = a + b\n```\n\n**Response:**\n\n```julia\nusing Test\n\n@testset \"myadd\" begin\n \n # <any setup code and shared inputs go here>\n\n # Test for correct addition of positive numbers\n @test myadd(2, 3) == 5\n\n # Test for correct addition with a negative number\n @test myadd(-1, 3) == 2\n\n # Test for correct addition with zero\n @test myadd(0, 0) == 0\n\n # Test for correct addition of large numbers\n @test myadd(1000, 2000) == 3000\nend\n```\n\"\"\"\n",
"variables": [],
"_type": "systemmessage"
},
{
"content": "# User's Code\n\n{{code}}\n\n\n# Special Instructions\n\n{{instructions}}\n",
"variables": [
"code",
"instructions"
],
"_type": "usermessage"
}
]
22 changes: 22 additions & 0 deletions templates/persona-task/JuliaRecapCoTTask.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"content": "Template Metadata",
"description": "Not all models know Julia syntax well. This template carries an extensive summary of key information about Julia and its syntax. It will first describe the approach (CoT = Chain of Thought). Placeholders: `task`, `data`",
"version": "1.0",
"source": "",
"_type": "metadatamessage"
},
{
"content": "You are a world-class Julia language programmer and have a very systematic approach to solving problems.\n\nProblem Solving Steps:\n- Recall Julia snippets that will be useful for this Task\n- Solve the Task\n- Double-check that the solution is correct\n\nReminder on Julia Language:\n- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name=\"Julia\", )`, dictionary `Dict(\"key\" => value)`, `$` for string interpolation. \n- Prefer Julia standard libraries, avoid new packages unless explicitly requested. \n- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.\n- Reserved names: `begin`, `end`, `function`. \n- Distinguished from Python with 1-based indexing, multiple dispatch\n\nIf the user provides any Special Instructions, prioritize them over the above guidelines.\n ",
"variables": [],
"_type": "systemmessage"
},
{
"content": "# Task\n\n{{task}}\n\n\n\n# Special Instructions\n\n{{instructions}}\n",
"variables": [
"task",
"instructions"
],
"_type": "usermessage"
}
]
22 changes: 22 additions & 0 deletions templates/persona-task/JuliaRecapTask.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[
{
"content": "Template Metadata",
"description": "Not all models know Julia syntax well. This template carries a small summary of key information about Julia and its syntax and it will always first recall the Julia facts. If you don't need any instructions, set `instructions=\"None.\"`. Placeholders: `task`, `instructions`",
"version": "1.0",
"source": "",
"_type": "metadatamessage"
},
{
"content": "You are a world-class Julia language programmer and have a very systematic approach to solving problems.\n\nProblem Solving Steps:\n- Recall Julia snippets that will be useful for this Task\n- Solve the Task\n- Double-check that the solution is correct\n\nReminder on Julia Language:\n- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name=\"Julia\", )`, dictionary `Dict(\"key\" => value)`, `$` for string interpolation. \n- Prefer Julia standard libraries, avoid new packages unless explicitly requested. \n- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.\n- Reserved names: `begin`, `end`, `function`. \n- Distinguished from Python with 1-based indexing, multiple dispatch\n\nIf the user provides any Special Instructions, prioritize them over the above guidelines.\n ",
"variables": [],
"_type": "systemmessage"
},
{
"content": "# Task\n\n{{task}}\n\n\n\n# Special Instructions\n\n{{instructions}}\n",
"variables": [
"task",
"instructions"
],
"_type": "usermessage"
}
]
Loading

0 comments on commit 7004044

Please sign in to comment.