Skip to content
This repository has been archived by the owner on Jul 19, 2019. It is now read-only.

Commit

Permalink
better handling of office documents per #1
Browse files Browse the repository at this point in the history
  • Loading branch information
hrbrmstr committed Mar 22, 2018
1 parent f56737d commit 9dd56f5
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 0 deletions.
32 changes: 32 additions & 0 deletions R/check-office.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
check_office <- function(hdr, path) {

# [Content_Types.xml] || length 19
c(
0x5b,0x43,0x6f,0x6e,0x74,0x65,0x6e,0x74,0x5f,0x54,
0x79,0x70,0x65,0x73,0x5d,0x2e,0x78,0x6d,0x6c
) -> pat_content_types

# _rels/.rels || length 11
pat_rels <- c(0x5f,0x72,0x65,0x6c,0x73,0x2f,0x2e,0x72,0x65,0x6c,0x73)

if ((all(pat_content_types == hdr[31:49])) || (all(pat_rels == hdr[31:41]))) {

hdr <- readBin(path, "raw", n=4096)

pat_word <- c(0x77,0x6f,0x72,0x64,0x2f)
if (length(seq_in(hdr, pat_word)) > 0)
return("application/vnd.openxmlformats-officedocument.wordprocessingml.document")

pat_ppt <- c(0x70,0x70,0x74,0x2f)
if (length(seq_in(hdr, pat_ppt)) > 0)
return("application/vnd.openxmlformats-officedocument.presentationml.presentation")

pat_xl <- c(0x78,0x6c,0x2f)
if (length(seq_in(hdr, pat_xl)) > 0)
return("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

}

return(NULL)

}
6 changes: 6 additions & 0 deletions R/get-content-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,15 @@ get_content_type <- function(path) {
if (all(c(0x52,0x49,0x46,0x46) == hdr[1:4])) return("audio/x-wav") # "RIFF"

if (all(c(0x50, 0x4b) == hdr[1:2])) { # "PK"

office_type <- check_office(hdr, path)
if (length(office_type) > 0) return(office_type)

guessed_name <- guess_content_type(path)
if ((length(guessed_name) == 1) && (guessed_name != "???")) return(guessed_name)

return("application/zip")

}

if (all(c(0x5a,0x4d) == hdr[1:2])) return("x-system/exe")
Expand Down
13 changes: 13 additions & 0 deletions R/util.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
seq_in <- function(a, b) {

which(
Reduce(
'+',
lapply(
seq_along(y <- lapply(b, '==', a)),
function(x) { y[[x]][x:(length(a) - length(b) +x)] }
)
) == length(b)
)

}

0 comments on commit 9dd56f5

Please sign in to comment.