Ich möchte die Daten, die ich von der Website kratzt, in eine CSV-Datei einfügen. Mein erster Versuch war die Verwendung von Scrapy, aber ich konnte die Syntax nicht richtig verstehen. Als ich es geschafft habe, beautifulsoup zu verwenden, weiß ich einfach nicht, wie ich es in eine CSV-Datei einfügen soll.

import requests
from bs4 import BeautifulSoup


URL = "https://www.practo.com/tests/glycosylated-haemoglobin-blood/p?city=delhi"
page = requests.get(URL)
#print(page)
soup=BeautifulSoup(page.content,'html.parser')
results = soup.find(id='root-app')
#print(results.prettify())
job_elems = results.find_all('div', class_='u-padrl--std')
#<h1 class="o-font-size--24 u-font-bold u-marginb--std">HbA1c Test</h1

for job_elem in job_elems:
    title_elem = job_elem.find('h1', class_='o-font-size--24 u-font-bold u-marginb--std')
    also_known = job_elem.find('span',class_="u-font-bold")
    cost = job_elem.find('div',class_="o-font-size--22 u-font-bold o-f-color--primary")
    what_test = job_elem.find('div',class_="c-markdown--unstyled")

    #test_prep = job_elem.find('div',class_="c-tabsection__content c-pp__accordion-item__content active")
    #temp = job_elem.find('p')
    print(title_elem.text)
    print(also_known.text)
    print(cost.text)
    print(what_test.text)
    #print(temp.text)
    #print(test_prep.text)
    print()

text_content = results.find_all('div',class_='c-markdown--unstyled')
# c-tabsection__content c-pp__accordion-item__content active
# c-tabsection c-pp__accordion-item u-br-rule u-padtb--std--half active
for item in text_content:
    prep = item.find('p')
    #,class_="c-tabsection__content c-pp__accordion-item__content active")
    print(prep.text)
    print('xxo')
0
Arju Aman 18 Aug. 2020 im 22:04

2 Antworten

Beste Antwort
import requests
from bs4 import BeautifulSoup
# import the csv module 
import csv

URL = "https://www.practo.com/tests/glycosylated-haemoglobin-blood/p?city=delhi"
page = requests.get(URL)
#print(page)
soup=BeautifulSoup(page.content,'html.parser')
results = soup.find(id='root-app')
#print(results.prettify())
job_elems = results.find_all('div', class_='u-padrl--std')
#<h1 class="o-font-size--24 u-font-bold u-marginb--std">HbA1c Test</h1
rows = []
fields = ['title_elem', 'also_known', 'cost', 'what_test']
filename = "myfile.csv"
for job_elem in job_elems:
    title_elem = job_elem.find('h1', class_='o-font-size--24 u-font-bold u-marginb--std').text.encode("utf-8")
    also_known = job_elem.find('span',class_="u-font-bold").text.encode("utf-8")
    cost = job_elem.find('div',class_="o-font-size--22 u-font-bold o-f-color--primary").text.encode("utf-8")
    what_test = job_elem.find('div',class_="c-markdown--unstyled").text.encode("utf-8")
    row = [title_elem, also_known, cost, what_test]
    rows.append(row)

with open(filename, 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile)   
    # writing the fields 
    csvwriter.writerow(fields)   
    # writing the data rows 
    csvwriter.writerows(rows)

text_content = results.find_all('div',class_='c-markdown--unstyled')
# c-tabsection__content c-pp__accordion-item__content active
# c-tabsection c-pp__accordion-item u-br-rule u-padtb--std--half active
for item in text_content:
    prep = item.find('p')
    #,class_="c-tabsection__content c-pp__accordion-item__content active")
    print(prep.text)
    print('xxo')
1
Seyi Daniel 18 Aug. 2020 im 20:07

Sie können die Bibliothek xlsxwriter verwenden.

import xlsxwriter

workbook = xlsxwriter.Workbook("file.xlsx")
worksheet = workbook.add_worksheet()

worksheet.write(row, column, element)

workbook.close()
1
Fabio Spoto 18 Aug. 2020 im 19:22