Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save localStorage and sessionStorage data from browser #2214

Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 45 additions & 26 deletions lncrawl/core/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def __init__(
timeout: Optional[int] = 120,
options: Optional["ChromeOptions"] = None,
cookie_store: Optional[RequestsCookieJar] = None,
browser_storage: Optional[dict] = None,
soup_maker: Optional[SoupMaker] = None,
) -> None:
"""
Expand All @@ -31,13 +32,15 @@ def __init__(
- timeout (Optional[int], optional): Maximum wait duration in seconds for an element to be available. Default: 120.
- options (Optional["ChromeOptions"], optional): Webdriver options. Default: None.
- cookie_store (Optional[RequestsCookieJar], optional): A cookie store to synchronize cookies. Default: None.
- browser_storage (Optional[dict], optional): A Storage to save some user info that is saved in your Browser storage. Default: None.
- soup_parser (Optional[str], optional): Parser for page content. Default: None.
"""
self._driver: WebDriver = None
self.options = options
self.timeout = timeout
self.headless = headless
self.cookie_store = cookie_store
self.browser_storage = browser_storage
self.soup_maker = soup_maker or SoupMaker()

def __del__(self):
Expand Down Expand Up @@ -72,36 +75,52 @@ def _init_browser(self):
def _apply_cookies(self):
if not self._driver:
return
if not isinstance(self.cookie_store, RequestsCookieJar):
return
for cookie in self.cookie_store:
self._driver.add_cookie(
{
"name": cookie.name,
"value": cookie.value,
"path": cookie.path,
"domain": cookie.domain,
"secure": cookie.secure,
"expiry": cookie.expires,
}
)
logger.debug("Cookies applied: %s", self._driver.get_cookies())
if isinstance(self.cookie_store, RequestsCookieJar):
for cookie in self.cookie_store:
self._driver.add_cookie(
{
"name": cookie.name,
"value": cookie.value,
"path": cookie.path,
"domain": cookie.domain,
"secure": cookie.secure,
"expiry": cookie.expires,
}
)
logger.debug("Cookies applied: %s", self._driver.get_cookies())
if isinstance(self.browser_storage, dict):
for key, value in self.browser_storage['localStorage'].items():
self._driver.execute_script("window.localStorage.setItem(arguments[0], arguments[1]);", key, value)
for key, value in self.browser_storage['sessionStorage'].items():
self._driver.execute_script("window.sessionStorage.setItem(arguments[0], arguments[1]);", key, value)
logger.debug("Storage applied: %s", self.browser_storage)

def _restore_cookies(self):
if not self._driver:
return
if not isinstance(self.cookie_store, RequestsCookieJar):
return
for cookie in self._driver.get_cookies():
self.cookie_store.set(
name=cookie.get("name"),
value=cookie.get("value"),
path=cookie.get("path"),
domain=cookie.get("domain"),
secure=cookie.get("secure"),
expires=cookie.get("expiry"),
)
logger.debug("Cookies retrieved: %s", self.cookie_store)
if isinstance(self.cookie_store, RequestsCookieJar):
for cookie in self._driver.get_cookies():
self.cookie_store.set(
name=cookie.get("name"),
value=cookie.get("value"),
path=cookie.get("path"),
domain=cookie.get("domain"),
secure=cookie.get("secure"),
expires=cookie.get("expiry"),
)
logger.debug("Cookies retrieved: %s", self.cookie_store)
if isinstance(self.browser_storage, dict):
self.browser_storage['localStorage'] = self._driver.execute_script(
"var ls = window.localStorage, items = {}; "
"for (var i = 0, k; i < ls.length; ++i) "
" items[k = ls.key(i)] = ls.getItem(k); "
"return items; ")
self.browser_storage['sessionStorage'] = self._driver.execute_script(
"var ls = window.sessionStorage, items = {}; "
"for (var i = 0, k; i < ls.length; ++i) "
" items[k = ls.key(i)] = ls.getItem(k); "
"return items; ")
logger.debug("Storage retrieved: %s", self.browser_storage)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea to store sessionStorage and localStorage along with the cookies. thanks

@property
def active(self):
Expand Down
Loading