source: other-projects/metadata-encoding/py/using-mongodb/graphSampleError.py@ 38773

Last change on this file since 38773 was 38773, checked in by jc550, 4 months ago

last batch of python to move

  • Property svn:executable set to *
File size: 1.6 KB
Line 
1#!/usr/bin/env python3
2
3import pymongo
4from matplotlib import pyplot as plt
5from collections import defaultdict
6import alive_progress
7
8# Connect to MongoDB
9client = pymongo.MongoClient("mongodb://localhost")
10db = client["alpss"]
11collection = db["crossref"]
12
13# Query MongoDB to get the relevant data
14cursor = collection.find({}, {"title": 1, "issued": 1})
15
16# Process the data to calculate the percentage of titles containing "&amp" by year
17year_count = defaultdict(int)
18amp_count = defaultdict(int)
19count = 0
20
21for document in cursor:
22 issued_year = document["issued"][0][0] if "issued" in document and document["issued"] else None
23 title_contains_amp = "&amp" in document.get("title", "")
24 #count = count + 1
25 #print(str(count))
26 if issued_year is not None:
27 year_count[repr(issued_year)] += 1
28 if title_contains_amp:
29 amp_count[repr(issued_year)] += 1
30 #if count == 500000:
31 #break
32
33# Calculate the percentage
34percentage_data = {year: (amp_count[year] / year_count[year]) * 100 if year_count[year] > 0 else 0 for year in year_count}
35
36#print(year_count)
37#print(amp_count)
38
39year_range_min = 1980
40year_range_max = 2023
41
42years = []
43percentages = []
44
45for yr in range(year_range_min, year_range_max + 1):
46 years.append(yr)
47 percentages.append(percentage_data[str(yr)])
48
49# Create a time-series graph using matplotlib
50#years = list(percentage_data.keys())
51#percentages = list(percentage_data.values())
52
53print(percentage_data)
54
55plt.plot(years, percentages, marker='o')
56plt.xlabel('Year')
57plt.ylabel('Percentage of Titles with "&amp"')
58plt.title('Time Series of Percentage of Titles with "&amp" by Year')
59plt.grid(True)
60plt.show()
Note: See TracBrowser for help on using the repository browser.