diff --git a/R/getCategory.R b/R/getCategory.R index 706a718..e3efadc 100644 --- a/R/getCategory.R +++ b/R/getCategory.R @@ -3,8 +3,12 @@ #' @param fresh get data from online. Default is FALSE using cached built-in data. #' @export getCategory <- function(fresh = FALSE) { + news_category(fresh) +} + +news_category <- function(fresh = FALSE) { if (!fresh) { - return(news_category) + return(news_category_data) } mcate <- getMainCategory() cate <- list() diff --git a/R/getComment.R b/R/getComment.R index 0ed114d..44c4416 100644 --- a/R/getComment.R +++ b/R/getComment.R @@ -17,7 +17,7 @@ getComment <- function(turl, count = 10, type = c("df", "list")) { - get_comment(turl, count, type) + news_comment(turl, count, type) } #' Get All Comment @@ -37,13 +37,13 @@ getComment <- function(turl, #' getAllComment("https://n.news.naver.com/mnews/article/214/0001195110") #' } getAllComment <- function(turl) { - get_comment(turl, "all", "df") + news_comment(turl, "all", "df") } #' @importFrom purrr when #' @importFrom httr2 req_perform resp_body_string #' @importFrom jsonlite fromJSON -get_comment <- function(turl, +news_comment <- function(turl, count = 10, type = c("df", "list")) { . <- NULL diff --git a/R/getCommentHistory.R b/R/getCommentHistory.R index 1bbf694..049163e 100644 --- a/R/getCommentHistory.R +++ b/R/getCommentHistory.R @@ -20,7 +20,7 @@ getCommentHistory <- function(turl, commentNo, count = 10, type = c("df", "list")) { - get_comment_history(turl, commentNo, count, type) + news_comment_history(turl, commentNo, count, type) } #' Get All Comment History @@ -39,12 +39,12 @@ getCommentHistory <- function(turl, getAllCommentHistory <- function(turl, commentNo) { - get_comment_history(turl, commentNo, "all", "df") + news_comment_history(turl, commentNo, "all", "df") } #' @importFrom purrr when #' @importFrom httr2 req_perform -get_comment_history <- function(turl, +news_comment_history <- function(turl, commentNo, count = 10, type = c("df", "list")) { diff --git a/R/getContent.R b/R/getContent.R index 9af9c0d..d1f924d 100644 --- a/R/getContent.R +++ b/R/getContent.R @@ -10,10 +10,22 @@ #' @importFrom rvest html_nodes html_text html_attr #' @examples #' \dontrun{ -#' getContent("https://n.news.naver.com/mnews/article/214/0001195110?sid=103") +#' news_content_("https://n.news.naver.com/mnews/article/214/0001195110?sid=103") #' } +getContent <- function(turl, + col = c("url", + "original_url", + "section", + "datetime", + "edittime", + "press", + "title", + "body", + "value")) { + news_content(turl, col) +} -getContent <- +news_content <- function(turl, col = c("url", "original_url", @@ -65,13 +77,13 @@ getContent <- } } if (value) { - original_url <- getOriginalUrl(html_obj) - title <- getContentTitle(html_obj) - datetime <- getContentDatetime(html_obj) - edittime <- getContentEditDatetime(html_obj) - press <- getContentPress(html_obj) - body <- getContentBody(html_obj) - section <- getSection(turl) + original_url <- news_content_original_url(html_obj) + title <- news_content_title(html_obj) + datetime <- news_content_datetime(html_obj) + edittime <- news_content_edit_datetime(html_obj) + press <- news_content_press(html_obj) + body <- news_content_body(html_obj) + section <- news_content_section(turl) } if (length(edittime) == 0) { @@ -91,7 +103,7 @@ getContent <- return(newsInfo[, col]) } -getContentTitle <- +news_content_title <- function(html_obj, title_node_info = "h2.media_end_head_headline", title_attr = "") { @@ -104,7 +116,7 @@ getContentTitle <- } -getContentDatetime <- +news_content_datetime <- function(html_obj, datetime_node_info = "span._ARTICLE_DATE_TIME", datetime_attr = "data-date-time") { @@ -116,7 +128,7 @@ getContentDatetime <- as.POSIXct(datetime, tz = "Asia/Seoul") } -getContentEditDatetime <- +news_content_edit_datetime <- function(html_obj, datetime_node_info = "span._ARTICLE_MODIFY_DATE_TIME", datetime_attr = "data-modify-date-time") { @@ -128,7 +140,7 @@ getContentEditDatetime <- as.POSIXct(datetime, tz = "Asia/Seoul") } -getContentPress <- +news_content_press <- function(html_obj, press_node_info = "div.media_end_head_top a img", press_attr = "title") { @@ -140,7 +152,7 @@ getContentPress <- return(press[1]) } -getContentBody <- +news_content_body <- function(html_obj, body_node_info = "div#dic_area", body_attr = "") { @@ -153,7 +165,7 @@ getContentBody <- return(body) } -getOriginalUrl <- function(html_obj, +news_content_original_url <- function(html_obj, origin_url_node_info = "a.media_end_head_origin_link", origin_url_attr = "href") { node <- rvest::html_nodes(html_obj, origin_url_node_info) @@ -166,7 +178,7 @@ getOriginalUrl <- function(html_obj, } #' @importFrom httr2 url_parse -getSection <- function(turl) { +news_content_section <- function(turl) { if (is.null(httr2::url_parse(turl)$query$sid)) { return(NA) } diff --git a/R/getMaxPageNum.R b/R/getMaxPageNum.R index c1d94f4..28965ea 100644 --- a/R/getMaxPageNum.R +++ b/R/getMaxPageNum.R @@ -11,8 +11,10 @@ #' \dontrun{ #' getMaxPageNum("https://news.naver.com/main/list.naver?mode=LS2D&mid=shm&sid1=103&sid2=376") #' } - getMaxPageNum <- function(turl, max = 100) { + news_max_page_num(turl, max) +} +news_max_page_num <- function(turl, max = 100) { httr2::request(turl) %>% httr2::req_url_query(page = max) %>% httr2::req_method("GET") %>% diff --git a/R/getUrlList.R b/R/getUrlList.R index 039ae0a..ef43ff6 100644 --- a/R/getUrlList.R +++ b/R/getUrlList.R @@ -13,7 +13,12 @@ #' getUrlList("https://news.naver.com/main/list.naver?mode=LS2D&mid=shm&sid1=103&sid2=376") #' } -getUrlList <- +getUrlList <- function(turl, + col = c("titles", "links")) { + news_urls_from_list(turl, col) +} + +news_urls_from_list <- function(turl, col = c("titles", "links")) { diff --git a/R/sysdata.rda b/R/sysdata.rda index b23c548..be387dc 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/news_category.R b/data-raw/news_category.R deleted file mode 100644 index dd6e275..0000000 --- a/data-raw/news_category.R +++ /dev/null @@ -1,6 +0,0 @@ -## code to prepare `news_category` dataset goes here -library(N2H4) - -news_category <- getCategory(fresh = TRUE) - -usethis::use_data(news_category, overwrite = TRUE, internal = TRUE) diff --git a/data-raw/news_category_data.R b/data-raw/news_category_data.R new file mode 100644 index 0000000..332f8ff --- /dev/null +++ b/data-raw/news_category_data.R @@ -0,0 +1,6 @@ +## code to prepare `news_category` dataset goes here +library(N2H4) + +news_category_data <- getCategory(fresh = TRUE) + +usethis::use_data(news_category_data, overwrite = TRUE, internal = TRUE) diff --git a/man/getContent.Rd b/man/getContent.Rd index 4fc262f..5f0dca9 100644 --- a/man/getContent.Rd +++ b/man/getContent.Rd @@ -23,6 +23,6 @@ Get naver news content from links. } \examples{ \dontrun{ - getContent("https://n.news.naver.com/mnews/article/214/0001195110?sid=103") + news_content_("https://n.news.naver.com/mnews/article/214/0001195110?sid=103") } } diff --git a/tests/testthat/test-func.R b/tests/testthat/test-func.R index 889f786..3284ca4 100644 --- a/tests/testthat/test-func.R +++ b/tests/testthat/test-func.R @@ -75,7 +75,7 @@ test_that("passSportsnews", { test_that("getCategory", { test <- getCategory() - expect_equal(test, news_category) + expect_equal(test, news_category_data) }) test_that("getCategoryFresh", {