From 54be49d3fa8ad54e0ce2a086dd67e6d8d034c85e Mon Sep 17 00:00:00 2001 From: joeyism Date: Sun, 15 Dec 2019 16:07:10 -0500 Subject: [PATCH] stripped and cleaned value --- README.rst | 59 +++++++++++++++++++++++++++++++++++++++++++++++ edgar/__init__.py | 2 +- edgar/xbrl.py | 2 +- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 0cf5d2a..07bf571 100644 --- a/README.rst +++ b/README.rst @@ -42,6 +42,17 @@ To get all companies and find a specific one, run edgar = Edgar() possible_companies = edgar.find_company_name("Cisco System") +To get XBRL data, run + +.. code-block:: python + + from edgar import Company, XBRL, XBRLElement + + company = Company("Oracle Corp", "0001341439") + results = company.get_data_files_from_10K("EX-101.INS", isxml=True) + xbrl = XBRL(results[0]) + XBRLElement(xbrl.relevant_children_parsed[15]).to_dict() // returns a dictionary of name, value, and schemaRef + API --- @@ -80,6 +91,39 @@ Returns the HTML in the form of `lxml.html `_ * ownership: defaults to include. Options are include, exclude, only. * no_of_entries: defaults to 100. Returns the number of entries to be returned. Maximum is 100. +get_10Ks +"""""""" + +Returns the HTML in the form of `lxml.html `_ of concatenation of all the documents in the 10-K + + +* **Input** + + * no_of_documents (default: 1): numer of documents to be retrieved + +get_document_type_from_10K +"""""""""""""""""""""""""" + +Returns the HTML in the form of `lxml.html `_ of the document within 10-K + + +* **Input** + + * document_type: Tye type of document you want, i.e. 10-K, EX-3.2 + * no_of_documents (default: 1): numer of documents to be retrieved + +get_data_files_from_10K +""""""""""""""""""""""" + +Returns the HTML in the form of `lxml.html `_ of the data file within 10-K + + +* **Input** + + * document_type: Tye type of document you want, i.e. EX-101.INS + * no_of_documents (default: 1): numer of documents to be retrieved + * isxml (default: False): by default, things aren't case sensitive and is parsed with ``html`` in ``lxml. If this is True, then it is parsed with``\ etree` which is case sensitive + Edgar ^^^^^ @@ -119,3 +163,18 @@ Returns a list of strings, each string contains the body of the specified docume * tree: lxml.html form that is returned from Company.getAllFilings * no_of_documents: number of document returned. If it is 1, the returned result is just one string, instead of a list of strings. Defaults to 1. + +XBRL +^^^^ + +Parses data from XBRL + + +* ``relevant_children`` + + * get children that are not ``context`` + +* ``relevant_children_parsed`` + + * get children that are not ``context``\ , ``unit``\ , ``schemaRef`` + * cleans tags diff --git a/edgar/__init__.py b/edgar/__init__.py index 4df755d..abe30d8 100644 --- a/edgar/__init__.py +++ b/edgar/__init__.py @@ -5,7 +5,7 @@ from .company import Company from .xbrl import XBRL, XBRLElement -__version__ = "4.1.0" +__version__ = "4.1.1" modules = glob.glob(dirname(__file__)+"/*.py") __all__ = [ basename(f)[:-3] for f in modules if isfile(f) and not f.endswith('__init__.py')] diff --git a/edgar/xbrl.py b/edgar/xbrl.py index 4fde28e..7704b21 100644 --- a/edgar/xbrl.py +++ b/edgar/xbrl.py @@ -83,7 +83,7 @@ def name(self): @property def value(self) -> str: - return self.child.text + return self.child.text.replace("\n", "").strip() def to_dict(self) -> Dict: return {