From a63dce2c468f3ab252a41486704dfc50146e5a62 Mon Sep 17 00:00:00 2001 From: Axel Kappel <69117984+Kl4rry@users.noreply.github.com> Date: Thu, 27 Jun 2024 21:35:59 +0200 Subject: [PATCH] add automatic language detection --- crates/ferrite-core/src/buffer.rs | 13 ++++- crates/ferrite-core/src/language.rs | 1 + crates/ferrite-core/src/language/detect.rs | 66 ++++++++++++++++++++++ thing.h | 14 +++++ 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 crates/ferrite-core/src/language/detect.rs create mode 100644 thing.h diff --git a/crates/ferrite-core/src/buffer.rs b/crates/ferrite-core/src/buffer.rs index 9f2ba61..d3d9277 100644 --- a/crates/ferrite-core/src/buffer.rs +++ b/crates/ferrite-core/src/buffer.rs @@ -22,7 +22,10 @@ use super::{ indent::Indentation, language::{get_language_from_path, syntax::Syntax}, }; -use crate::{clipboard, event_loop_proxy::EventLoopProxy, keymap::LineMoveDir}; +use crate::{ + clipboard, event_loop_proxy::EventLoopProxy, keymap::LineMoveDir, + language::detect::detect_language, +}; pub mod case; pub mod encoding; @@ -212,7 +215,13 @@ impl Buffer { syntax.update_text(rope.clone()); } - // TODO add parsing of shebang and other heuristics + if let Some(language) = detect_language(syntax.get_language_name().as_deref(), rope.clone()) + { + if let Err(err) = syntax.set_language(language) { + tracing::error!("Error setting language: {err}"); + } + syntax.update_text(rope.clone()); + } let name = path.file_name().unwrap().to_string_lossy().into(); diff --git a/crates/ferrite-core/src/language.rs b/crates/ferrite-core/src/language.rs index eb4a682..0cf961c 100644 --- a/crates/ferrite-core/src/language.rs +++ b/crates/ferrite-core/src/language.rs @@ -5,6 +5,7 @@ use tree_sitter::Language; use self::syntax::HighlightConfiguration; +pub mod detect; pub mod syntax; #[derive(Clone)] diff --git a/crates/ferrite-core/src/language/detect.rs b/crates/ferrite-core/src/language/detect.rs new file mode 100644 index 0000000..80688c8 --- /dev/null +++ b/crates/ferrite-core/src/language/detect.rs @@ -0,0 +1,66 @@ +use ropey::{Rope, RopeSlice}; + +pub fn detect_language(inital_guess: Option<&str>, content: Rope) -> Option<&'static str> { + tracing::info!("inital_guess: {inital_guess:?}"); + if inital_guess == Some("c") { + let cpp_markers = [ + "public", + "protected", + "private", + "std::", + "dynamic_cast", + "static_cast", + "reinterpret_cast", + "#include ", + "#include ", + "#include ", + "class", + "throw", + "catch", + "try", + "nullptr", + "const&", + "final", + ]; + if detect_markers(content.slice(..), &cpp_markers) > 3 { + return Some("cpp"); + } + } + + detect_shebang(content.slice(..)) +} + +fn detect_shebang(content: RopeSlice) -> Option<&'static str> { + let first_line = content + .slice(..content.len_chars().min(1000)) + .get_line(0)? + .to_string(); + + let shebangs = [ + ("python3", "python"), + ("python2", "python"), + ("python", "python"), + ("#!/bin/bash", "bash"), + ("#!/usr/bin/bash", "bash"), + ("#!/bin/sh", "bash"), + ("#!/usr/bin/env bash", "bash"), + ("zsh", "bash"), + ]; + + for (shebang, language) in shebangs { + if first_line.contains(shebang) { + return Some(language); + } + } + + None +} + +fn detect_markers(content: RopeSlice, markers: &[&str]) -> usize { + let start = content.slice(..content.len_chars().min(1000)).to_string(); + let mut count = 0; + for marker in markers { + count += start.contains(marker) as usize; + } + count +} diff --git a/thing.h b/thing.h new file mode 100644 index 0000000..11f6205 --- /dev/null +++ b/thing.h @@ -0,0 +1,14 @@ + + +#include +#include +#include + +class Thing { + public: + int thing; +} + +int main() { + std::cout << "hello" << std::endl; +} \ No newline at end of file