Simple Python Recursive Web Scraper

I tried to make a simple recursive web scraper using Python. My idea was to grab all the links, titles and tag names.

Website: https://lifebridgecapital.com/podcast/

Course of Action:

Grab all the tags links from the Website.

tag_words_links(Website) --> (https://lifebridgecapital.com/tag/multifamily/)(2)

My script fetches all the links, tag names and titles from those links which tag_words_links returned. Some of these pages have pagination and some don’t, so I used an if condition to catch those pages which contain class="page-numbers".

By looking at the code, anyone can see clearly there is a lot of repetition going on in there, therefore I’d like to keep it DRY. Any suggestions and ideas are much appreciated.

Here is the code:

from requests_html import HTMLSession
import csv
import time


def tag_words_links(url):
    global _session
    _request = _session.get(url)
    tags = _request.html.find('a.tag-cloud-link')
    links = ()
    for link in tags:
        links.append(link.find('a', first=True).attrs('href'))
    
    return links

def parse_tag_links(link):
    global _session
    _request = _session.get(link)
    article_links = _request.html.find('h3 a')
    tag_names = (tag.text for tag in _request.html.find('div.infinite-page-caption'))
    articles = (article.find('a', first=True).attrs('href') for article in article_links)
    titles = (title.text for title in _request.html.find('h3.gdlr-core-blog-title'))
    if 'class="page-numbers"' in _request.text:
        next_page = _request.html.find('a.page-numbers')
        url = {url.find('a', first=True).attrs('href') for url in next_page}
        for page in url:
            next_page_request = _session.get(page)
            article_links = next_page_request.html.find('h3 a')
            for article in article_links:
                articles.append(article.find('a', first=True).attrs('href'))
            for title in article_links:
                titles.append((title.text for title in title.find('h3.gdlr-core-blog-title')))
            for tags in article_links:
                tag_names.append((tags for tags in tags.find('div.infinite-page-caption')))

    scraped_data = {
        'Title': titles,
        'Tag_Name': tag_names,
        'Link': articles
    }


    return scraped_data


if __name__ == '__main__':
    data = ()
    _session = HTMLSession()
    url = 'https://lifebridgecapital.com/podcast/'
    links = tag_words_links(url)
    for link in links:
        data.append(parse_tag_links(link))
        time.sleep(2)

    with open('life-bridge-capital-tags.csv', 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=data(0).keys())
        writer.writeheader()
        for row in data:
            writer.writerow(row)