python – Asynchronous Web Scraper

This is my first asyncio/aiohttp web scraper I am trying to wrap my head around Python’s asyncio/aiohttp libs these days and I am not sure yet I fully understand it or not so I’d like have some constructive enhancement reviews here.

I’m scraping https://www.spoonflower.com/ which contains some public API’s for design data and pricing per fabric type data .My challenge was to get the design name, creator name and price of each design as per fabric type.Design name and creator name comes from this endpoint

https://pythias.spoonflower.com/search/v1/designs?lang=en&page_offset=0&sort=bestSelling&product=Fabric&forSale=true&showMatureContent=false&page_locale=en

and other pricing per fabric type data coming from this endpoint.

https://api-gateway.spoonflower.com/alpenrose/pricing/fabrics/FABRIC_’+ fab_type +’?quantity=1&shipping_country=PK&currency=EUR&measurement_system=METRIC&design_id=’+str(item(‘designId’))+’&page_locale=en

Each page has 84 items and 24 fabric types.I’m first getting all the names of the fabric types and storing in a list so I can loop through it and change the url dynamically then extracting designName and screenName from design page and finally extracting the price data.

Here is my code:

import asyncio
import aiohttp
import json
import requests
from bs4 import BeautifulSoup
from collections import OrderedDict


item_endpoint = 'https://pythias.spoonflower.com/search/v1/designs?lang=en&page_offset=0&sort=bestSelling&product=Fabric&forSale=true&showMatureContent=false&page_locale=en'

def get_fabric_names():
    res = requests.get('https://www.spoonflower.com/spoonflower_fabrics')
    soup = BeautifulSoup(res.text, 'lxml')
    fabrics = (fabric.find('h2').text.strip() for fabric in soup.find_all('div', {'class': 'product_detail medium_text'}))
    fabric = (("_".join(fab.upper().replace(u"u2122", '').split())) for fab in fabrics)
    for index in range(len(fabric)):
        if 'COTTON_LAWN_(BETA)' in fabric(index):
            fabric(index) = 'COTTON_LAWN_APPAREL'
        elif 'COTTON_POPLIN' in fabric(index):
            fabric(index) = 'COTTON_POPLIN_BRAVA'
        elif 'ORGANIC_COTTON_KNIT' in fabric(index):
            fabric(index) = 'ORGANIC_COTTON_KNIT_PRIMA'
        elif 'PERFORMANCE_PIQUÉ' in fabric(index):
            fabric(index) = 'PERFORMANCE_PIQUE'
        elif 'CYPRESS_COTTON' in fabric(index):
            fabric(index) = 'CYPRESS_COTTON_BRAVA'
    return fabric

async def fetch_design_endpoint(session, design_url):
    async with session.get(design_url) as response:
        extracting_endpoint = await response.text()
        _json_object = json.loads(extracting_endpoint)
        return _json_object('page_results')

async def fetch_pricing_data(session, pricing_endpoint):
    async with session.get(pricing_endpoint) as response:
        data_endpoint = await response.text()
        _json_object = json.loads(data_endpoint)
        items_dict = OrderedDict()
        for item in await fetch_design_endpoint(session, item_endpoint):
            designName = item('name')
            screenName = item('user')('screenName')
            fabric_name = _json_object('data')('fabric_code')
            try:
                test_swatch_meter = _json_object('data')('pricing')('TEST_SWATCH_METER')('price')
            except:
                test_swatch_meter = 'N/A'
            try:
                fat_quarter_meter = _json_object('data')('pricing')('FAT_QUARTER_METER')('price')
            except:
                fat_quarter_meter = 'N/A'
            try:
                meter = _json_object('data')('pricing')('METER')('price')
            except:
                meter = 'N/A'

            
            #print(designName, screenName, fabric_name, test_swatch_meter,fat_quarter_meter, meter)

            if (designName, screenName) not in items_dict.keys():
                items_dict((designName, screenName)) = {}
            itemCount = len(items_dict((designName, screenName)).values()) / 4
            return items_dict((designName, screenName)).update({'fabric_name_%02d' %itemCount: fabric_name,
            'test_swatch_meter_%02d' %itemCount: test_swatch_meter,
            'fat_quarter_meter_%02d' %itemCount: fat_quarter_meter,
            'meter_%02d' %itemCount: meter})
                

        

async def main():
    tasks = ()
    async with aiohttp.ClientSession() as session:
        fabric_type = get_fabric_names()
        design_page = await fetch_design_endpoint(session, item_endpoint)
        for item in design_page:
            for fab_type in fabric_type(0:-3):
                pricing_url = 'https://api-gateway.spoonflower.com/alpenrose/pricing/fabrics/FABRIC_'+ fab_type +'?quantity=1&shipping_country=PK&currency=EUR&measurement_system=METRIC&design_id='+str(item('designId'))+'&page_locale=en'
                print(pricing_url)
                await fetch_pricing_data(session, pricing_url)

                tasks.append(asyncio.create_task(
                    fetch_pricing_data(session, pricing_url)

                    )
                )

        content = await asyncio.gather(*tasks)
        return content
results = asyncio.run(main())
print(results)

Any ideas and suggestions are welcome to make this scraper more pythonic and smart.