Using the Data
Practical examples of consuming PaperStack API data — building question banks, filters, and analytics.
Using the Data
This guide shows practical patterns for working with PaperStack data in your applications.
Fetching Papers
Full Paper vs Subject Extracts
You can fetch either the full paper (all subjects merged) or individual subject files:
import requests
headers = {"Authorization": "Bearer ps_xxxxx"}
base = "https://api.paperstack.qzz.io"
# Full paper — questions in global order (1-200)
paper = requests.get(f"{base}/neet/2024/s1/paper.json", headers=headers).json()
print(f"Total questions: {paper['total']}")
# Subject extract — questions renumbered 1..N
physics = requests.get(f"{base}/neet/2024/s1/physics.json", headers=headers).json()
print(f"Physics questions: {physics['total']}")Shift Discovery
Always discover available shifts before querying:
# List years
years_resp = requests.get(f"{base}/neet").json()
print(f"Available years: {years_resp['years']}")
# List shifts for a year
shifts_resp = requests.get(f"{base}/neet/2024").json()
for shift in shifts_resp["shifts"]:
print(f"{shift['shift']}: {shift['total']} questions, checksum: {shift['checksum']}")Filtering Questions
By Subject and Type
import json
data = requests.get(f"{base}/neet/2024/s1/paper.json", headers=headers).json()
mcqs = [q for q in data["questions"] if q["type"] == "mcq"]
nat_questions = [q for q in data["questions"] if q["type"] == "nat"]
print(f"MCQs: {len(mcqs)}, NATs: {len(nat_questions)}")By Topic
from collections import Counter
topic_counts = Counter(q["topic"] for q in data["questions"])
for topic, count in topic_counts.most_common(10):
print(f"{topic}: {count} questions")By Marks
# Find high-value questions
high_value = [q for q in data["questions"] if q["marks"] >= 4]
print(f"High-value questions: {len(high_value)}")
# Calculate total marks
total_marks = sum(q["marks"] for q in data["questions"])
print(f"Total possible marks: {total_marks}")Building a Question Bank
Create a Random Practice Set
import random
def create_practice_set(questions, count=10):
selected = random.sample(questions, min(count, len(questions)))
return [
{
"id": q["id"],
"question": q["text"],
"type": q["type"],
"options": q["options"],
"topic": q["topic"],
"marks": q["marks"],
}
for q in selected
]
practice = create_practice_set(data["questions"], 10)
print(json.dumps(practice, indent=2))Topic-Focused Practice
def get_questions_by_topic(questions, topic):
return [q for q in questions if q["topic"] == topic]
# Practice specific topics
kinematics = get_questions_by_topic(data["questions"], "kinematics")
print(f"Found {len(kinematics)} kinematics questions")Working with Diagrams
Download All Diagrams
import os
def download_diagrams(questions, headers, base, exam, year, shift):
os.makedirs("diagrams", exist_ok=True)
for q in questions:
if q.get("diagrams"):
for diagram in q["diagrams"]:
file = diagram["file"]
resp = requests.get(
f"{base}/{exam}/{year}/{shift}/{file}",
headers=headers
)
local_name = f"{q['id']}_{os.path.basename(file)}"
with open(f"diagrams/{local_name}", "wb") as f:
f.write(resp.content)
print(f"Downloaded: {local_name}")Analytics
Topic Distribution
import json
from collections import Counter
import matplotlib.pyplot as plt
topics = Counter(q["topic"] for q in data["questions"])
# Plot top 15 topics
top_15 = topics.most_common(15)
labels, values = zip(*top_15)
plt.figure(figsize=(10, 6))
plt.barh(range(len(labels)), values)
plt.yticks(range(len(labels)), labels)
plt.xlabel("Question count")
plt.title(f"Topic Distribution — NEET 2024 Physics")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig("topic_distribution.png")Difficulty Analysis
from collections import Counter
difficulty = Counter(q.get("difficulty") for q in data["questions"])
for level, count in difficulty.most_common():
print(f"{level or 'unrated'}: {count} questions ({count/len(data['questions'])*100:.0f}%)")Type Breakdown
type_counts = Counter(q["type"] for q in data["questions"])
total = len(data["questions"])
for qtype, count in type_counts.most_common():
print(f"{qtype}: {count} ({count/total*100:.0f}%)")Data Verification
Every dataset includes a SHA-256 checksum for integrity verification:
import hashlib
# Compute checksum locally
content = json.dumps(data, separators=(",", ":")).encode("utf-8")
local_hash = hashlib.sha256(content).hexdigest()
# Compare with API-reported checksum
assert local_hash == data["checksum"], "Data integrity check failed!"
print("Checksum verified ✓")JavaScript / TypeScript Examples
const headers = { Authorization: "Bearer ps_xxxxx" };
const base = "https://api.paperstack.qzz.io";
async function fetchPaper(exam, year, shift, file) {
const res = await fetch(`${base}/${exam}/${year}/${shift}/${file}`, { headers });
return res.json();
}
// Usage
const paper = await fetchPaper("neet", "2024", "s1", "paper.json");
console.log(`Schema: ${paper.schema}, Questions: ${paper.total}`);
console.log(`Checksum: ${paper.checksum}`);
// Group by subject
const bySubject = {};
for (const q of paper.questions) {
(bySubject[q.subject] ??= []).push(q);
}
console.log(Object.keys(bySubject));Response Validation
Check that the response matches expected structure:
def validate_paper(data):
required = ["schema", "exam", "year", "shift", "subjects", "total", "questions"]
for field in required:
assert field in data, f"Missing field: {field}"
assert data["schema"] == "v4", f"Unexpected schema: {data['schema']}"
assert len(data["questions"]) == data["total"], "Question count mismatch"
print("Response valid ✓")Rate Limiting Best Practices
import time
def fetch_with_retry(url, headers, max_retries=3):
for attempt in range(max_retries):
resp = requests.get(url, headers=headers)
if resp.status_code == 200:
return resp.json()
if resp.status_code == 429:
wait = min(2 ** attempt, 30)
print(f"Rate limited. Waiting {wait}s...")
time.sleep(wait)
continue
resp.raise_for_status()
raise Exception("Max retries exceeded")