Skip to content

Commit

Permalink
add automatic language detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Kl4rry committed Jun 27, 2024
1 parent af89b6c commit d57b798
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 2 deletions.
13 changes: 11 additions & 2 deletions crates/ferrite-core/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ use super::{
indent::Indentation,
language::{get_language_from_path, syntax::Syntax},
};
use crate::{clipboard, event_loop_proxy::EventLoopProxy, keymap::LineMoveDir};
use crate::{
clipboard, event_loop_proxy::EventLoopProxy, keymap::LineMoveDir,
language::detect::detect_language,
};

pub mod case;
pub mod encoding;
Expand Down Expand Up @@ -212,7 +215,13 @@ impl Buffer {
syntax.update_text(rope.clone());
}

// TODO add parsing of shebang and other heuristics
if let Some(language) = detect_language(syntax.get_language_name().as_deref(), rope.clone())
{
if let Err(err) = syntax.set_language(language) {
tracing::error!("Error setting language: {err}");
}
syntax.update_text(rope.clone());
}

let name = path.file_name().unwrap().to_string_lossy().into();

Expand Down
1 change: 1 addition & 0 deletions crates/ferrite-core/src/language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use tree_sitter::Language;

use self::syntax::HighlightConfiguration;

pub mod detect;
pub mod syntax;

#[derive(Clone)]
Expand Down
66 changes: 66 additions & 0 deletions crates/ferrite-core/src/language/detect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
use ropey::{Rope, RopeSlice};

pub fn detect_language(inital_guess: Option<&str>, content: Rope) -> Option<&'static str> {
tracing::info!("inital_guess: {inital_guess:?}");
if inital_guess == Some("c") {
let cpp_markers = [
"public",
"protected",
"private",
"std::",
"dynamic_cast",
"static_cast",
"reinterpret_cast",
"#include <iostream>",
"#include <vector>",
"#include <string>",
"class",
"throw",
"catch",
"try",
"nullptr",
"const&",
"final",
];
if detect_markers(content.slice(..), &cpp_markers) > 3 {
return Some("cpp");
}
}

detect_shebang(content.slice(..))
}

fn detect_shebang(content: RopeSlice) -> Option<&'static str> {
let first_line = content
.slice(..content.len_chars().min(1000))
.get_line(0)?
.to_string();

let shebangs = [
("python3", "python"),
("python2", "python"),
("python", "python"),
("#!/bin/bash", "bash"),
("#!/usr/bin/bash", "bash"),
("#!/bin/sh", "bash"),
("#!/usr/bin/env bash", "bash"),
("zsh", "bash"),
];

for (shebang, language) in shebangs {
if first_line.contains(shebang) {
return Some(language);
}
}

None
}

fn detect_markers(content: RopeSlice, markers: &[&str]) -> usize {
let start = content.slice(..content.len_chars().min(1000)).to_string();
let mut count = 0;
for marker in markers {
count += start.contains(marker) as usize;
}
count
}

0 comments on commit d57b798

Please sign in to comment.