main

View Source

 1from bs4 import BeautifulSoup
 2import requests
 3from job import Job
 4import db as db
 5
 6
 7# URL containing pre-filtered job search results
 8# for "software engineer intern/entry level" in the last 7 days
 9LINKEDIN_URL = "https://www.linkedin.com/jobs/search/?currentJobId=3511050778&f_E=1%2C2&f_JT=F%2CI&f_TPR=r604800&geoId=103644278&keywords=software%20engineer%20intern&location=United%20States&refresh=true"
10
11
12def scrape_postings() -> list[Job]:
13    """Scrapes job postings from LinkedIn and returns a list of Job objects"""
14
15    # Get the HTML content of the URL
16    html: str = requests.get(LINKEDIN_URL).content
17    soup: BeautifulSoup = BeautifulSoup(html, "html.parser")
18
19    # Unordered list containing all job postings
20    posting_list: list[BeautifulSoup] = soup.find_all(
21        name="div", class_="job-search-card"
22    )
23
24    # List of Job objects
25    jobs: list[Job] = []
26
27    for post in posting_list:
28        title: str = str(
29            post.find(name="h3", class_="base-search-card__title").text
30        ).strip()
31        company: str = str(
32            post.find(name="a", class_="hidden-nested-link").text
33        ).strip()
34        location: str = str(
35            post.find(name="span", class_="job-search-card__location").text
36        ).strip()
37        date: str = str(post.find(name="time").text).strip()
38        link: str = str(
39            post.find(name="a", class_="base-card__full-link").get("href")
40        ).strip()
41
42        jobs.append(Job(title, company, location, link, date))
43
44    return jobs
45
46
47def main():
48    """Main function"""
49
50    new_postings: list[Job] = scrape_postings()
51
52    assert len(new_postings) > 0, "No new postings found"
53
54
55if __name__ == "__main__":
56    db.initialize_database()
57    main()

def scrape_postings() -> list[job.Job]: View Source

13def scrape_postings() -> list[Job]:
14    """Scrapes job postings from LinkedIn and returns a list of Job objects"""
15
16    # Get the HTML content of the URL
17    html: str = requests.get(LINKEDIN_URL).content
18    soup: BeautifulSoup = BeautifulSoup(html, "html.parser")
19
20    # Unordered list containing all job postings
21    posting_list: list[BeautifulSoup] = soup.find_all(
22        name="div", class_="job-search-card"
23    )
24
25    # List of Job objects
26    jobs: list[Job] = []
27
28    for post in posting_list:
29        title: str = str(
30            post.find(name="h3", class_="base-search-card__title").text
31        ).strip()
32        company: str = str(
33            post.find(name="a", class_="hidden-nested-link").text
34        ).strip()
35        location: str = str(
36            post.find(name="span", class_="job-search-card__location").text
37        ).strip()
38        date: str = str(post.find(name="time").text).strip()
39        link: str = str(
40            post.find(name="a", class_="base-card__full-link").get("href")
41        ).strip()
42
43        jobs.append(Job(title, company, location, link, date))
44
45    return jobs

Scrapes job postings from LinkedIn and returns a list of Job objects

def main(): View Source

48def main():
49    """Main function"""
50
51    new_postings: list[Job] = scrape_postings()
52
53    assert len(new_postings) > 0, "No new postings found"

Main function