Skip to content

Commit

Permalink
Better Stata parsing (#13)
Browse files Browse the repository at this point in the history
* adjusting regex and adding Stata code husks

* data step now handles Stata do/run imports... still need to do work to get it to print though

* adjusting comments, refreshing output

* corrections as suggested via code review

* appending to changelog and adding author

* adjusting to better handle infinite do-loops

* regen output

* adjusting code to better handle source line enumeration

* regen output

* adding test case

* adjusting regen for new test

* uploading from RAS, an additional regex adjustment

* adjusting regex to make it more clear

* regen output

* Add to changelog
  • Loading branch information
rbisewski authored and Righolt committed Apr 12, 2018
1 parent 2d2fc70 commit ef34dd9
Show file tree
Hide file tree
Showing 10 changed files with 161 additions and 112 deletions.
9 changes: 7 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
### Changed
### Removed

## [1.0.0] - 2017-12-14
## [1.0.1] - 2018-04-12
### Added
- Support for alternative Stata file imports via `run` and `do`

## 1.0.0 - 2017-12-14
### Added
- Initial version of code
- Initial version of demos

[Unreleased]: https://github.com/VaccineAndDrugEvaluationCentre/code-diary-sas/compare/v1.0.0...HEAD
[Unreleased]: https://github.com/VaccineAndDrugEvaluationCentre/code-diary-sas/compare/v1.0.1...HEAD
[1.0.1]: https://github.com/VaccineAndDrugEvaluationCentre/code-diary-sas/compare/v1.0.0...v1.0.1
114 changes: 61 additions & 53 deletions demo/output-coder.md
Original file line number Diff line number Diff line change
@@ -1,56 +1,64 @@
% Title for output document, just an example (v 1.2.3)
% Christiaan Righolt, Barret Monchka, Salah Mahmud; Vaccine and Drug Evaluation Centre (VDEC)
% December 7, 2017
% Christiaan Righolt, Robert Bisewski, Barret Monchka, Salah Mahmud; Vaccine and Drug Evaluation Centre (VDEC)
% April 11, 2018

# Scripts/macros used for project
* 1: C:\Users\righoltc\Documents\GitHub\code-diary-sas\demo\project_main.sas
* 1.1: &DEMO_ROOT.project_script.sas
* 1.2: &DEMO_ROOT.generate_documentation.sas
* 1.2.1: &MACRO_ROOT.code_diary.sas
* 1.2.2: &MACRO_ROOT.convert_markdown_to_html.sas
* 1.s1: &DEMO_ROOT.project_stata.do

# Task list
* 1.1:64 Insert this code from reference Qwerty (Nature, 2345)

# Exclusion criteria

## Time
* 1.1:47 Exclude any record before 1960

## Person
* 1.1:40 Exclude Martians from analysis
* 1.1:41 Exclude terrestrial gods.

# No keyword
* 1.1:5 Test comment with special characters:"`&
* 1.1:14 Example of a comment without keyword

# Main
* 1:6 This document is generated as an example output

# Def
* 1:7 The answer to life the universe and everything = 42

# Test
* 1.1:11 Code Diary comment inside block comments
* 1.1:18 Same line Code Diary block comment B
* 1.1:74 Longer, multiple line comment test

# Statistics
* 1.1:52 Use the fanciest order of tests
1.1:53 1. The Atlantic test procedures
1.1:54 2. The Pacific test procedures
1.1:55 3. The Arctic test procedures
* 1.1:60 We use alpha=0.05 in all tests

# Analysis
* 1.1:67 Use the Milkyway default analysis for grouping of people
1.1:68 The Milkyway has stars, this is a test for a single-line two star that is appended to the above in markdown / HTML

# Regex
* 1.1:70 Essential definitions for the text-parsing regex.

# Stata
* 1.s1:1 Just an example of Stata comments
* 1.s1:8 A one-line Stata command
* 1: C:\Users\bisewskr\development\code-diary-sas\demo\project_main.sas
* 1.1: &demo_root.project_script.sas
* 1.2: &demo_root.generate_documentation.sas
* 1.2.1: &macro_root.code_diary.sas
* 1.2.2: &macro_root.convert_markdown_to_html.sas
* 1.s1: &demo_root.project_stata.do
* 1.s1.s16: &demo_root\stata_husk_a.do
* 1.s1.s17: &demo_root\stata_husk_b.do
* 1.s1.s18: &demo_root\stata_husk_c.do

# Task list
* 1.1:64 Insert this code from reference Qwerty (Nature, 2345)

# Exclusion criteria

## Time
* 1.1:47 Exclude any record before 1960

## Person
* 1.1:40 Exclude Martians from analysis
* 1.1:41 Exclude terrestrial gods.

# No keyword
* 1.1:5 Test comment with special characters:"`&
* 1.1:14 Example of a comment without keyword

# Main
* 1:6 This document is generated as an example output

# Def
* 1:7 The answer to life the universe and everything = 42

# Test
* 1.1:11 Code Diary comment inside block comments
* 1.1:18 Same line Code Diary block comment B
* 1.1:74 Longer, multiple line comment test

# Statistics
* 1.1:52 Use the fanciest order of tests
1.1:53 1. The Atlantic test procedures
1.1:54 2. The Pacific test procedures
1.1:55 3. The Arctic test procedures
* 1.1:60 We use alpha=0.05 in all tests

# Analysis
* 1.1:67 Use the Milkyway default analysis for grouping of people
1.1:68 The Milkyway has stars, this is a test for a single-line two star that is appended to the above in markdown / HTML

# Regex
* 1.1:70 Essential definitions for the text-parsing regex.

# Stata
* 1.s1:6 Just an example of Stata comments
* 1.s1:13 A one-line Stata command
* 1.s1:15 Test the include/run/do parsing
* 1.s1.s16:5 This file husk does nothing except help with testing the `include` call...
* 1.s1.s17:1 This file husk does nothing except help with testing the `run` call...
* 1.s1.s18:1 This file husk does nothing except help with testing the `do` call...

8 changes: 6 additions & 2 deletions demo/output-for-all.htm
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
<body>
<div id="header">
<h1 class="title-data">Title for output document, just an example (v 1.2.3)</h1>
<h2 class="title-data">Christiaan Righolt, Barret Monchka, Salah Mahmud; Vaccine and Drug Evaluation Centre (VDEC)</h2>
<h2 class="title-data">December 7, 2017</h2>
<h2 class="title-data">Christiaan Righolt, Robert Bisewski, Barret Monchka, Salah Mahmud; Vaccine and Drug Evaluation Centre (VDEC)</h2>
<h2 class="title-data">April 11, 2018</h2>
</div>
<div id="TOC">
<ul class="toc">
Expand Down Expand Up @@ -79,6 +79,10 @@ <h1 id="stata43">Stata</h1>
<ul>
<li>Just an example of Stata comments</li>
<li>A one-line Stata command</li>
<li>Test the include/run/do parsing</li>
<li>This file husk does nothing except help with testing the `include` call...</li>
<li>This file husk does nothing except help with testing the `run` call...</li>
<li>This file husk does nothing except help with testing the `do` call...</li>
</ul>
</body>
</html>
71 changes: 38 additions & 33 deletions demo/output-for-all.md
Original file line number Diff line number Diff line change
@@ -1,45 +1,50 @@
% Title for output document, just an example (v 1.2.3)
% Christiaan Righolt, Barret Monchka, Salah Mahmud; Vaccine and Drug Evaluation Centre (VDEC)
% December 7, 2017
% Christiaan Righolt, Robert Bisewski, Barret Monchka, Salah Mahmud; Vaccine and Drug Evaluation Centre (VDEC)
% April 11, 2018

# Exclusion criteria
# Exclusion criteria

## Time
* Exclude any record before 1960
## Time
* Exclude any record before 1960

## Person
* Exclude Martians from analysis
* Exclude terrestrial gods.
## Person
* Exclude Martians from analysis
* Exclude terrestrial gods.

# No keyword
* Test comment with special characters:"`&
* Example of a comment without keyword
# No keyword
* Test comment with special characters:"`&
* Example of a comment without keyword

# Main
* This document is generated as an example output
# Main
* This document is generated as an example output

# Def
* The answer to life the universe and everything = 42
# Def
* The answer to life the universe and everything = 42

# Test
* Code Diary comment inside block comments
* Same line Code Diary block comment B
* Longer, multiple line comment test
# Test
* Code Diary comment inside block comments
* Same line Code Diary block comment B
* Longer, multiple line comment test

# Statistics
* Use the fanciest order of tests
1. The Atlantic test procedures
2. The Pacific test procedures
3. The Arctic test procedures
* We use alpha=0.05 in all tests
# Statistics
* Use the fanciest order of tests
1. The Atlantic test procedures
2. The Pacific test procedures
3. The Arctic test procedures
* We use alpha=0.05 in all tests

# Analysis
* Use the Milkyway default analysis for grouping of people
The Milkyway has stars, this is a test for a single-line two star that is appended to the above in markdown / HTML
# Analysis
* Use the Milkyway default analysis for grouping of people
The Milkyway has stars, this is a test for a single-line two star that is appended to the above in markdown / HTML

# Regex
* Essential definitions for the text-parsing regex.
# Regex
* Essential definitions for the text-parsing regex.

# Stata
* Just an example of Stata comments
* A one-line Stata command
* Test the include/run/do parsing
* This file husk does nothing except help with testing the `include` call...
* This file husk does nothing except help with testing the `run` call...
* This file husk does nothing except help with testing the `do` call...

# Stata
* Just an example of Stata comments
* A one-line Stata command
2 changes: 1 addition & 1 deletion demo/project_main.sas
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
@main :title Title for output document, just an example
@main :authors Christiaan Righolt, Barret Monchka, Salah Mahmud
@main :authors Christiaan Righolt, Robert Bisewski, Barret Monchka, Salah Mahmud
@main :org Vaccine and Drug Evaluation Centre (VDEC)
@main :version 1.2.3
@main This document is generated as an example output
Expand Down
9 changes: 9 additions & 0 deletions demo/project_stata.do
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
/*
* @stata Edit this to reflect the current directory
*/
local demo_root C:\Users\bisewskr\development\code-diary-sas\demo

**@stata Just an example of Stata comments;

/*
Expand All @@ -7,3 +12,7 @@ using the same notation as in SAS.

**@stata A one-line Stata command;

**@stata Test the include/run/do parsing;
include `demo_root'\stata_husk_a.do
run `demo_root'\stata_husk_b.do
do `demo_root'\stata_husk_c.do
6 changes: 6 additions & 0 deletions demo/stata_husk_a.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// this comment exists to test whether the regex is avoiding the search of commented code
//
// global F8 "do "P:\commond\source\main.do""

**@stata This file husk does nothing except help with testing the `include` call...;
di "attempting to call an include"
2 changes: 2 additions & 0 deletions demo/stata_husk_b.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
**@stata This file husk does nothing except help with testing the `run` call...;
di "attempting to call a run"
2 changes: 2 additions & 0 deletions demo/stata_husk_c.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
**@stata This file husk does nothing except help with testing the `do` call...;
di "attempting to call a do"
50 changes: 29 additions & 21 deletions source/code_diary.sas
Original file line number Diff line number Diff line change
Expand Up @@ -807,13 +807,15 @@ Copyright (c) 2016 Vaccine and Drug Evaluation Centre, Winnipeg.
%let prx_grab_include_file = 's/(.*include ")(.+)(".*)/$2/'; * Grabs the included script name;
data _includes_&curr_script_no_text.;
set _m_ds_current_file_content;
source_line = lowcase(source_line);
if prxmatch("/.*include.*\.sas.*/", source_line);
run;
data _includes_&curr_script_no_text.;
set _includes_&curr_script_no_text.;

length script_no $&len_script_no.;
length script $&len_script.;

* If you update this line, ALSO UPDATE THE SECOND PART OF THE STATEMENT ACCORDINGLY!!! Otherwise that line counts as an include;
where (lowcase(source_line) like '%include%.sas%') and not (source_line like '%where lowcase(source_line)%');

script_no = ("&curr_script_no." || "." || strip(put(_N_, &len_script_no..)));
script = prxchange(&prx_grab_include_file., -1, source_line);

Expand All @@ -829,37 +831,49 @@ Copyright (c) 2016 Vaccine and Drug Evaluation Centre, Winnipeg.
%if "&input_file_type." = "sas" %then %do;
data _in_stata_&curr_script_no_text.;
set _m_ds_current_file_content;
source_line = lowcase(source_line);
if prxmatch("/x .*stata.*do.*\.do.*/", source_line);
run;
data _in_stata_&curr_script_no_text.;
set _in_stata_&curr_script_no_text.;

length script_no $&len_script_no.;
length script $&len_script.;

* If you update this line, ALSO UPDATE THE SECOND PART OF THE STATEMENT ACCORDINGLY!!! Otherwise that line counts as an include;
where (lowcase(source_line) like '%x %stata%do%.do%') and not (source_line like '%where lowcase(source_line)%');


script_no = ("&curr_script_no." || ".s" || strip(put(_N_, &len_script_no..)));
script = prxchange(&prx_grab_stata_file., -1, source_line);

drop line_no source_line;
run;
%end;


* Regex to obtain the include/run/do files needed, for three common Stata import / embedded-call types;
*;
* 1) parse --> do /path/to/file.do;
* 2) parse --> do "/path/to/file.do";
* 3) avoid --> global F8 "do "P:\project_name\source\main.do"";
%let prx_grab_stata_file = 's/^[\s\/\*]*(include|run|do)"?[ \t]+"?([^"]+\.do)"*/$2/';

* Regex to grab the included script name;
%let prx_stata_to_sas_macro = "s/`(\w+)'/&$1/";

* Find stata files called from stata;
%let prx_grab_stata_file = 's/(.*include )(.+\.do)(.*)/$2/'; * Grabs the included script name;
%let prx_stata_to_sas_macro = "s/(`)(\w+)(')/&$2/"; * Grabs the included script name;
%if "&input_file_type." = "do" %then %do;
data _in_stata_&curr_script_no_text.;
set _m_ds_current_file_content;

length script_no $&len_script_no.;
length script $&len_script.;

* If you update this line, ALSO UPDATE THE SECOND PART OF THE STATEMENT ACCORDINGLY!!! Otherwise that line counts as an include;
where (lowcase(source_line) like '%include%.do%') and not (source_line like '%where lowcase(source_line)%');


* select only the source lines of interest;
source_line = lowcase(source_line);

if prxmatch(&prx_grab_stata_file, source_line);

script_no = ("&curr_script_no." || ".s" || strip(put(_N_, &len_script_no..)));
script = prxchange(&prx_grab_stata_file., -1, source_line);
script = prxchange(&prx_stata_to_sas_macro., -1, script);

drop line_no source_line;
run;
%end;
Expand Down Expand Up @@ -893,12 +907,6 @@ Copyright (c) 2016 Vaccine and Drug Evaluation Centre, Winnipeg.
run;

%end;

%else %if ("&input_file." ~= "C:\dir\file.sas") %then %do;
* Throw warning when it is not the "detected script" in this file ();
%put WARNING: The included file &input_file. does not exist.;
%end;

%mend;

* Read files line-by-line and returns line numbers with text;
Expand Down

0 comments on commit ef34dd9

Please sign in to comment.