import re with open('index.html','r',encoding='utf-8') as f: html = re.sub(' ','',f.read()) section_pattern = '<section class="main_section">(.*?)</section>' section_s = re.findall(section_pattern,html) category_pattern = '<h1>(.*?)</h1>' course_pattern='<span class="course_name">(.*?)</span>' data_s = [] for section in section_s: category = re.findall(category_pattern,section)[0] # print(category) course_s = re.findall(course_pattern,section) data_s.append( { 'category':category, 'course_s':course_s } ) for data in data_s: print(data.get('category')) for course in data.get('course_s'): print(' ',course)