Skip to content

Commit

Permalink
Update politicians contact list
Browse files Browse the repository at this point in the history
  • Loading branch information
bot committed Jul 15, 2024
1 parent 35f682d commit b43eebc
Show file tree
Hide file tree
Showing 6 changed files with 488 additions and 476 deletions.
892 changes: 447 additions & 445 deletions congress.csv

Large diffs are not rendered by default.

Binary file modified scripts/__pycache__/__main__.cpython-38.pyc
Binary file not shown.
Binary file modified scripts/__pycache__/congress_crawler.cpython-38.pyc
Binary file not shown.
Binary file modified scripts/__pycache__/senate_crawler.cpython-38.pyc
Binary file not shown.
54 changes: 32 additions & 22 deletions scripts/congress_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
import asyncio
from urllib.parse import urljoin

logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s",
level=logging.INFO)
logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)


def get_html(url):
Expand All @@ -30,8 +29,8 @@ def __init__(self):
self.base_url = "https://www.camara.leg.br/"
self.congress = []
self.search_url = (
self.base_url +
"deputados/quem-sao/resultado?search=&partido=&uf=&sexo=")
self.base_url + "deputados/quem-sao/resultado?search=&partido=&uf=&sexo="
)

async def run(self):
"""Start the Congress crawler."""
Expand All @@ -41,30 +40,37 @@ async def run(self):
if int(total) < 1:
logging.error(
"The latest legislature"
"'s quorum for the Congress have not been informed yet")
"'s quorum for the Congress have not been informed yet"
)

pages = round(int(total) / 25) + 1
tasks = []
for i in range(1, pages):
tasks.append(
asyncio.create_task(
self.get_congress_by_page(self.search_url +
"&legislatura=" +
legislature + "&pagina=" +
str(i))))
self.get_congress_by_page(
self.search_url
+ "&legislatura="
+ legislature
+ "&pagina="
+ str(i)
)
)
)
await asyncio.gather(*tasks)

except Exception:
logging.exception("global failure")
requests.post(
os.environ.get("SLACK_WEBHOOK_URL"),
json.dumps({
"channel": "#notifications",
"icon_emoji": ":fire:",
"text":
":warning: Brazilian congress crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
"username": "politicians-contacts",
}),
json.dumps(
{
"channel": "#notifications",
"icon_emoji": ":fire:",
"text": ":warning: Brazilian congress crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
"username": "politicians-contacts",
}
),
headers={"Content-Type": "application/json"},
)

Expand Down Expand Up @@ -105,7 +111,10 @@ async def get_congress_by_page(self, url):
tasks.append(
asyncio.create_task(
self.get_congress_person_data(
urljoin(self.base_url, link.get("href")))))
urljoin(self.base_url, link.get("href"))
)
)
)

await asyncio.gather(*tasks)

Expand All @@ -119,16 +128,17 @@ async def get_congress_person_data(self, url):
congressperson = self.get_congress_person_data_from_page(url)
if congressperson:
self.congress.append(congressperson)
logging.info(f"congressperson: {url} - "
f' email: {congressperson["email"]} -'
f' party: {congressperson["party"]}')
logging.info(
f"congressperson: {url} - "
f' email: {congressperson["email"]} -'
f' party: {congressperson["party"]}'
)

def get_congress_person_data_from_page(self, url):
try:
soup = BeautifulSoup(get_html(url), "html.parser")
name = soup.find(id="nomedeputado").contents[0]
party_state = soup.find(
class_="informacoes-deputado__inline").contents[1]
party_state = soup.find(class_="informacoes-deputado__inline").contents[1]
party = re.findall(r".+?(?=\s-)", party_state)[0]
email = soup.find(class_="email").contents[0]

Expand Down
18 changes: 9 additions & 9 deletions scripts/senate_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import requests
import os

logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s",
level=logging.INFO)
logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO)


def get_html(url):
Expand Down Expand Up @@ -46,13 +45,14 @@ async def run(self):
logging.exception("global failure")
requests.post(
os.environ.get("SLACK_WEBHOOK_URL"),
json.dumps({
"channel": "#notifications",
"icon_emoji": ":fire:",
"text":
":warning: Brazilian senate crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
"username": "politicians-contacts",
}),
json.dumps(
{
"channel": "#notifications",
"icon_emoji": ":fire:",
"text": ":warning: Brazilian senate crawler <https:/7onn/politicians-contacts/actions|failed>! :fire:",
"username": "politicians-contacts",
}
),
headers={"Content-Type": "application/json"},
)
finally:
Expand Down

0 comments on commit b43eebc

Please sign in to comment.