Merge pull request #1 from crossjam/feature-starred-entries

Implement starred and feed endpoints
crossjam · Oct 15, 2023 · bcd0569 · bcd0569
2 parents aaec8e8 + 757db1a
commit bcd0569
Show file tree

Hide file tree

Showing 3 changed files with 127 additions and 46 deletions.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}

diff --git a/feedbin_tools/cli.py b/feedbin_tools/cli.py
@@ -3,6 +3,7 @@
 import sys
 from itertools import islice
 from pprint import pformat
+from datetime import datetime, timezone
 
 
 import click
@@ -27,6 +28,63 @@ def batched(iterable, n):
         yield batch
 
 
+def paginated_request(request_url, auth=None, params={}):
+    logging.debug("requesting with potential pagination: %s", request_url)
+
+    session = requests_cache.CachedSession()
+
+    resp = session.get(
+        request_url,
+        auth=auth,
+        params=params,
+    )
+    resp.raise_for_status()
+
+    logging.debug("resp headers:\n %s", pformat(list(resp.headers.items())))
+
+    record_count = int(resp.headers.get("X-Feedbin-Record-Count", -1))
+    logging.info("Total records for url: %s", record_count)
+
+    items = resp.json()
+
+    logging.info("Total records in response: %s", len(items))
+
+    for item in items:
+        yield item
+
+    while "Links" in resp.headers:
+        # Requests will do the following automatagically for the ’link’ header
+        # Unfortunately the feedbin api uses the ’links’ header
+        links = parse_header_links(resp.headers["links"])
+
+        resolved_links = {}
+        for link in links:
+            key = link.get("rel") or link.get("url")
+            resolved_links[key] = link
+
+        logging.info("Response pagination: %s", resolved_links)
+
+        if "next" not in resolved_links:
+            break
+
+        next_url = resolved_links["next"]["url"]
+
+        logging.info("Fetching next page: %s", next_url)
+        resp = session.get(
+            next_url,
+            auth=auth,
+            params=params,
+        )
+        resp.raise_for_status()
+
+        items = resp.json()
+
+        logging.info("Total records in response: %s", len(items))
+
+        for item in items:
+            yield item
+
+
 @click.group()
 @click.version_option()
 @click.option(
@@ -82,71 +140,83 @@ def subscriptions(ctx):
     resp = session.get("https://api.feedbin.com/v2/subscriptions.json", auth=auth)
     resp.raise_for_status()
 
-    json.dump(resp.json(), sys.stdout)
+    for item in resp.json():
+        sys.stdout.write(json.dumps(item) + "\n")
 
 
-def paginated_request(request_url, auth=None, params={}):
-    logging.debug("requesting with potential pagination: %s", request_url)
+@cli.command(name="starred")
+@click.option("-b", "--chunk-size", type=click.INT, default=75)
+@click.option("--extended/--no-extended", default=False)
+@click.option("--ids/--no-ids", default=False)
+@click.option("--limit", type=click.INT, default=-1)
+@click.pass_context
+def starred(ctx, chunk_size, extended, ids, limit):
+    "Command description goes here"
+
+    chunk_size = min(chunk_size, 100)
+    logging.info("Chunk size: %d", chunk_size)
+    auth = auth_from_context(ctx)
+    params = {"mode": "extended"} if extended else {}
 
     session = requests_cache.CachedSession()
+    resp = session.get("https://api.feedbin.com/v2/starred_entries.json", auth=auth)
 
-    resp = session.get(
-        request_url,
-        auth=auth,
-        params=params,
-    )
     resp.raise_for_status()
 
-    logging.debug("resp headers:\n %s", pformat(list(resp.headers.items())))
-
-    record_count = int(resp.headers.get("X-Feedbin-Record-Count", -1))
-    logging.info("Total records for url: %s", record_count)
-
-    items = resp.json()
-
-    logging.info("Total records in response: %s", len(items))
-
-    for item in items:
-        yield item
+    starred_ids = resp.json()
 
-    while "Links" in resp.headers:
-        # Requests will do the following automatagically for the ’link’ header
-        # Unfortunately the feedbin api uses the ’links’ header
-        links = parse_header_links(resp.headers["links"])
+    logging.info("Starred entries id count: %d", len(starred_ids))
 
-        resolved_links = {}
-        for link in links:
-            key = link.get("rel") or link.get("url")
-            resolved_links[key] = link
+    clean_chunks = []
+    for i, chunk in enumerate(batched(starred_ids, chunk_size), 1):
+        clean_chunk = [v for v in chunk if v]
+        clean_chunks.append(clean_chunk)
+    logging.info("Processing %d chunks of size %d or less", i, chunk_size)
 
-        logging.info("Response pagination: %s", resolved_links)
+    if ids:
+        logging.info("Emitting starred item ids")
+        total_emitted = 0
+        for chunk in clean_chunks:
+            for v in chunk:
+                if 0 <= limit <= total_emitted:
+                    logging.info("Reached limit of %d, completed", limit)
+                    return
 
-        if "next" not in resolved_links:
-            break
+                sys.stdout.write(str(v) + "\n")
+                total_emitted += 1
+    else:
+        logging.info("Emitting starred items for %d chunks", len(clean_chunks))
+        total_emitted = 0
 
-        next_url = resolved_links["next"]["url"]
+        for i, chunk in enumerate(clean_chunks, 1):
+            params["ids"] = ",".join([str(v) for v in chunk])
 
-        logging.info("Fetching next page: %s", next_url)
-        resp = session.get(
-            next_url,
-            auth=auth,
-            params=params,
-        )
-        resp.raise_for_status()
+            logging.info("Fetching entries for chunk %d", i)
+            logging.debug("ids=%s", params["ids"])
+            resp = session.get(
+                "https://api.feedbin.com/v2/entries.json", auth=auth, params=params
+            )
+            resp.raise_for_status()
 
-        items = resp.json()
+            for item in resp.json():
+                if 0 <= limit <= total_emitted:
+                    logging.info("Reached limit of %d, completed", limit)
+                    return
 
-        logging.info("Total records in response: %s", len(items))
+                current_utc = datetime.now(timezone.utc)
+                iso_format = current_utc.isoformat()
+                item["x-retrieved-at"] = iso_format
 
-        for item in items:
-            yield item
+                sys.stdout.write(json.dumps(item) + "\n")
+                total_emitted += 1
 
 
 @cli.command(name="feed")
 @click.option("--extended/--no-extended", default=False)
+@click.option("--limit", type=click.INT, default=-1)
 @click.argument("feed_id")
 @click.pass_context
-def feed(ctx, feed_id, extended):
+def feed(ctx, feed_id, extended, limit):
     """
     Fetch entries for feedbin feed FEED_ID and emit as JSON
     """
@@ -159,5 +229,16 @@ def feed(ctx, feed_id, extended):
 
     logging.info("Request params: %s", params)
 
-    for item in paginated_request(entries_url, auth=None, params=params):
+    total_emitted = 0
+    for item in paginated_request(entries_url, auth=auth, params=params):
+        if 0 <= limit <= total_emitted:
+            logging.info("Reached limit of %d, completed", limit)
+            return
+
+        current_utc = datetime.now(timezone.utc)
+        iso_format = current_utc.isoformat()
+
+        item["x-retrieved-at"] = iso_format
+
         sys.stdout.write(json.dumps(item) + "\n")
+        total_emitted += 1