11use itertools:: Itertools ;
22use miette:: Diagnostic ;
33use rayon:: prelude:: * ;
4- use regex:: RegexSet ;
4+ use regex:: { Captures , Regex , RegexSet } ;
55use serde:: Serialize ;
66use std:: collections:: HashMap ;
77use std:: ffi:: OsStr ;
88use std:: fs:: File ;
99use std:: io;
10- use std:: ops:: RangeBounds ;
10+ use std:: ops:: { Deref , RangeBounds } ;
1111use std:: path:: { Path , PathBuf } ;
12- use std:: sync:: Arc ;
12+ use std:: sync:: { Arc , LazyLock } ;
1313use thiserror:: Error ;
1414use tree_sitter:: Language ;
1515
@@ -254,6 +254,7 @@ pub enum SourceLanguage {
254254 Java ,
255255 #[ serde( rename = "C++" ) ]
256256 Cpp ,
257+ Python ,
257258}
258259
259260impl From < SourceLanguage > for Language {
@@ -262,6 +263,7 @@ impl From<SourceLanguage> for Language {
262263 SourceLanguage :: Rust => tree_sitter_rust_orchard:: LANGUAGE . into ( ) ,
263264 SourceLanguage :: Java => tree_sitter_java:: LANGUAGE . into ( ) ,
264265 SourceLanguage :: Cpp => tree_sitter_cpp:: LANGUAGE . into ( ) ,
266+ SourceLanguage :: Python => tree_sitter_python:: LANGUAGE . into ( ) ,
265267 }
266268 }
267269}
@@ -270,12 +272,30 @@ const IDENTS_RS: &[&str] = &["debug", "info", "warn"];
270272const IDENTS_JAVA : & [ & str ] = & [ "logger" , "log" , "fine" , "debug" , "info" , "warn" , "trace" ] ;
271273const IDENTS_CPP : & [ & str ] = & [ "debug" , "info" , "warn" , "trace" ] ;
272274
275+ const IDENTS_PYTHON : & [ & str ] = & [ "debug" , "info" , "warn" , "trace" ] ;
276+
277+ static RUST_PLACEHOLDER_REGEX : LazyLock < Regex > = LazyLock :: new ( || {
278+ Regex :: new ( r#"\{(?:([a-zA-Z_][a-zA-Z0-9_.]*)|(\d+))?\s*(?::[^}]*)?}"# ) . unwrap ( )
279+ } ) ;
280+
281+ static JAVA_PLACEHOLDER_REGEX : LazyLock < Regex > =
282+ LazyLock :: new ( || Regex :: new ( r#"\{.*}|\\\{(.*)}"# ) . unwrap ( ) ) ;
283+
284+ static CPP_PLACEHOLDER_REGEX : LazyLock < Regex > = LazyLock :: new ( || {
285+ Regex :: new ( r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]|\{(?:([a-zA-Z_][a-zA-Z0-9_.]*)|(\d+))?\s*(?::[^}]*)?}"# ) . unwrap ( )
286+ } ) ;
287+
288+ static PYTHON_PLACEHOLDER_REGEX : LazyLock < Regex > = LazyLock :: new ( || {
289+ Regex :: new ( r#"%[-+ #0]*\d*(?:\.\d+)?[hlLzjt]*[diuoxXfFeEgGaAcspn%]"# ) . unwrap ( )
290+ } ) ;
291+
273292impl SourceLanguage {
274293 pub fn as_str ( & self ) -> & ' static str {
275294 match self {
276295 SourceLanguage :: Rust => "Rust" ,
277296 SourceLanguage :: Java => "Java" ,
278297 SourceLanguage :: Cpp => "C++" ,
298+ SourceLanguage :: Python => "Python" ,
279299 }
280300 }
281301
@@ -284,6 +304,7 @@ impl SourceLanguage {
284304 Some ( "rs" ) => Some ( Self :: Rust ) ,
285305 Some ( "java" ) => Some ( Self :: Java ) ,
286306 Some ( "h" | "hh" | "hpp" | "hxx" | "tpp" | "cc" | "cpp" | "cxx" ) => Some ( Self :: Cpp ) ,
307+ Some ( "py" ) => Some ( Self :: Python ) ,
287308 None | Some ( _) => None ,
288309 }
289310 }
@@ -339,6 +360,20 @@ impl SourceLanguage {
339360 )
340361 "#
341362 }
363+ SourceLanguage :: Python => {
364+ r#"
365+ (
366+ (expression_statement
367+ (call
368+ function: (_) @func
369+ arguments: (argument_list .
370+ (string) @args
371+ )
372+ )
373+ )
374+ )
375+ "#
376+ }
342377 }
343378 }
344379
@@ -347,7 +382,34 @@ impl SourceLanguage {
347382 SourceLanguage :: Rust => IDENTS_RS ,
348383 SourceLanguage :: Java => IDENTS_JAVA ,
349384 SourceLanguage :: Cpp => IDENTS_CPP ,
385+ SourceLanguage :: Python => IDENTS_PYTHON ,
386+ }
387+ }
388+
389+ fn get_placeholder_regex ( & self ) -> & ' static Regex {
390+ match self {
391+ SourceLanguage :: Rust => RUST_PLACEHOLDER_REGEX . deref ( ) ,
392+ SourceLanguage :: Java => JAVA_PLACEHOLDER_REGEX . deref ( ) ,
393+ SourceLanguage :: Cpp => CPP_PLACEHOLDER_REGEX . deref ( ) ,
394+ SourceLanguage :: Python => PYTHON_PLACEHOLDER_REGEX . deref ( ) ,
395+ }
396+ }
397+
398+ fn captures_to_format_arg ( & self , caps : & Captures ) -> FormatArgument {
399+ for ( index, cap) in caps. iter ( ) . skip ( 1 ) . enumerate ( ) {
400+ if let Some ( cap) = cap {
401+ return match ( self , index) {
402+ ( SourceLanguage :: Rust | SourceLanguage :: Java | SourceLanguage :: Cpp , 0 ) => {
403+ FormatArgument :: Named ( cap. as_str ( ) . to_string ( ) )
404+ }
405+ ( SourceLanguage :: Rust | SourceLanguage :: Cpp , 1 ) => {
406+ FormatArgument :: Positional ( cap. as_str ( ) . parse ( ) . unwrap ( ) )
407+ }
408+ _ => unreachable ! ( ) ,
409+ } ;
410+ }
350411 }
412+ FormatArgument :: Placeholder
351413 }
352414}
353415
@@ -513,7 +575,7 @@ pub fn extract_logging_guarded(sources: &[CodeSource], guard: &WorkGuard) -> Vec
513575 for result in results {
514576 // println!("node.kind()={:?} range={:?}", result.kind, result.range);
515577 match result. kind . as_str ( ) {
516- "string_literal" => {
578+ "string_literal" | "string" => {
517579 if let Some ( src_ref) = SourceRef :: new ( code, result) {
518580 patterns. push ( src_ref. pattern . clone ( ) ) ;
519581 matched. push ( src_ref) ;
@@ -852,4 +914,32 @@ fn main() {
852914 } , ]
853915 ) ;
854916 }
917+
918+ const PYTHON_SOURCE : & str = r#"
919+ def main(args):
920+ logger.info("foo %s \N{greek small letter pi}", test_var)
921+ logging.info(f'Hello, {args[1]}!')
922+ logger.warning(f"warning message:\nlow disk space")
923+ logger.info(rf"""info message:
924+ processing started -- {args[0]}""")
925+ "# ;
926+
927+ #[ test]
928+ fn test_basic_python ( ) {
929+ let log_ref = LogRef :: new ( "foo bar π" ) ;
930+ let code = CodeSource :: from_string ( & Path :: new ( "in-mem.py" ) , PYTHON_SOURCE ) ;
931+ let src_refs = extract_logging ( & [ code] , & ProgressTracker :: new ( ) )
932+ . pop ( )
933+ . unwrap ( )
934+ . log_statements ;
935+ assert_yaml_snapshot ! ( src_refs) ;
936+ let vars = extract_variables ( & log_ref, & src_refs[ 0 ] ) ;
937+ assert_eq ! (
938+ vars,
939+ vec![ VariablePair {
940+ expr: "test_var" . to_string( ) ,
941+ value: "bar" . to_string( )
942+ } , ]
943+ ) ;
944+ }
855945}
0 commit comments