switched to json
Browse files
comb.json
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "novatra_overview",
|
| 4 |
+
"type": "event_overview",
|
| 5 |
+
"title": "NovaTra 1.0 Event Overview",
|
| 6 |
+
"text": "NovaTra 1.0 is a hybrid business innovation and entrepreneurship competition organized by MLSC VCET (Microsoft Learn Student Chapter, Vidyavardhini's College of Engineering and Technology). The event is open to all undergraduate students and requires teams of 2-3 members with no solo participation allowed. The competition provides a platform for students to pitch entrepreneurial and business ideas while evaluating creativity, feasibility, problem-solving ability, and communication skills through real-world pitching scenarios including auctions, networking, and situational adaptability.",
|
| 7 |
+
"metadata": {
|
| 8 |
+
"event_name": "NovaTra 1.0",
|
| 9 |
+
"organizer": "MLSC VCET",
|
| 10 |
+
"theme": "Business Innovation & Entrepreneurship",
|
| 11 |
+
"event_type": "Hybrid",
|
| 12 |
+
"audience": "Undergraduate students",
|
| 13 |
+
"team_size": "2-3 members",
|
| 14 |
+
"date": "2025-09-17",
|
| 15 |
+
"category": "competition_overview"
|
| 16 |
+
}
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"id": "novatra_registration",
|
| 20 |
+
"type": "registration_process",
|
| 21 |
+
"title": "NovaTra 1.0 Registration and Selection Process",
|
| 22 |
+
"text": "Registration for NovaTra 1.0 runs from September 9-13, 2025, requiring teams to submit a registration form with team details and a mandatory 1-minute idea video in MP4 format or YouTube/Drive link. The top 30 teams will be shortlisted on September 13-14, 2025, based on clarity, uniqueness, feasibility, and presentation quality. Shortlisted teams must participate in a reel round on September 15, 2025, creating a 30-60 second Instagram reel showcasing their product/idea while tagging MLSC VCET official Instagram and using hashtags #mlsc #novatra1.0. The registration fee is ₹149 per shortlisted team.",
|
| 23 |
+
"metadata": {
|
| 24 |
+
"registration_period": "2025-09-09 to 2025-09-13",
|
| 25 |
+
"shortlisting_date": "2025-09-13 to 2025-09-14",
|
| 26 |
+
"reel_round_date": "2025-09-15",
|
| 27 |
+
"main_event_date": "2025-09-17",
|
| 28 |
+
"fee": "₹149",
|
| 29 |
+
"shortlisted_teams": 30,
|
| 30 |
+
"video_duration": "60 seconds",
|
| 31 |
+
"reel_duration": "30-60 seconds",
|
| 32 |
+
"category": "registration_details"
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"id": "novatra_schedule",
|
| 37 |
+
"type": "event_schedule",
|
| 38 |
+
"title": "NovaTra 1.0 Event Day Schedule (September 17, 2025)",
|
| 39 |
+
"text": "The offline event on September 17, 2025, follows a structured schedule: 8:30 AM arrival and ID distribution, 9:00-9:45 AM opening session and inauguration, 9:45-11:30 AM Round 1 Auction Round, 11:45 AM-12:45 PM Round 2 One-Minute Pitch with Jury Q&A, 1:00-1:30 PM Mini Game 1 Networking Game, 1:45 PM elimination announcement (top 10 teams), 2:00-2:15 PM Round 3 briefing and chit distribution, 2:15-3:00 PM lunch break and PPT preparation, 3:00-5:00 PM Round 3 Final Presentations with Jury Q&A, 5:00-5:45 PM Mini Game 2 Quiz/Guest Talk, 5:45-6:15 PM results and certificate distribution, and 6:15-6:30 PM closing ceremony.",
|
| 40 |
+
"metadata": {
|
| 41 |
+
"date": "2025-09-17",
|
| 42 |
+
"start_time": "08:30",
|
| 43 |
+
"end_time": "18:30",
|
| 44 |
+
"total_duration": "10 hours",
|
| 45 |
+
"rounds": 3,
|
| 46 |
+
"mini_games": 2,
|
| 47 |
+
"elimination_point": "After Round 2",
|
| 48 |
+
"finalists": 10,
|
| 49 |
+
"category": "event_schedule"
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"id": "novatra_round1",
|
| 54 |
+
"type": "competition_round",
|
| 55 |
+
"title": "NovaTra 1.0 Round 1: Auction Round",
|
| 56 |
+
"text": "Round 1 is an Auction Round designed to simulate bidding for competitive advantages and test decision-making and resource management skills. Each team receives virtual ₹10,000 to bid on advantages announced by the auctioneer. Available advantages include +30 seconds in Round 2, skip jury questions, choose presentation order, bonus slide, second attempt, and wildcard tokens. The highest bidder wins each advantage with credits deducted from their virtual budget. Teams cannot overspend, and tie-breakers are resolved with quick one-line pitches. This round has no elimination.",
|
| 57 |
+
"metadata": {
|
| 58 |
+
"round_number": 1,
|
| 59 |
+
"round_name": "Auction Round",
|
| 60 |
+
"virtual_budget": "₹10,000",
|
| 61 |
+
"purpose": "Decision-making & resource management",
|
| 62 |
+
"elimination": false,
|
| 63 |
+
"advantages": ["Extra time", "Skip questions", "Choose order", "Bonus slide", "Second attempt", "Wildcard"],
|
| 64 |
+
"time_slot": "09:45-11:30",
|
| 65 |
+
"category": "competition_round"
|
| 66 |
+
}
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"id": "novatra_round2",
|
| 70 |
+
"type": "competition_round",
|
| 71 |
+
"title": "NovaTra 1.0 Round 2: One-Minute Pitch",
|
| 72 |
+
"text": "Round 2 requires a strictly timed 1-minute elevator pitch with no props, focusing on concise communication and confidence. The recommended pitch structure includes: hook (5-10 seconds), problem statement (10-15 seconds), solution (20-25 seconds), impact and market fit (10-15 seconds), and closing line (5 seconds). Teams can use advantages purchased in Round 1. After the pitch, judges ask 1-2 questions. Evaluation criteria worth 25% of total score include clarity, uniqueness, feasibility, and Q&A handling. Penalties apply for exceeding the time limit.",
|
| 73 |
+
"metadata": {
|
| 74 |
+
"round_number": 2,
|
| 75 |
+
"round_name": "One-Minute Pitch",
|
| 76 |
+
"duration": "60 seconds",
|
| 77 |
+
"evaluation_weight": "25%",
|
| 78 |
+
"props_allowed": false,
|
| 79 |
+
"pitch_components": ["Hook", "Problem", "Solution", "Impact", "Closing"],
|
| 80 |
+
"qa_included": true,
|
| 81 |
+
"time_slot": "11:45-12:45",
|
| 82 |
+
"category": "competition_round"
|
| 83 |
+
}
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"id": "novatra_networking",
|
| 87 |
+
"type": "mini_game",
|
| 88 |
+
"title": "NovaTra 1.0 Mini Game 1: Networking Game",
|
| 89 |
+
"text": "The Networking Game encourages peer interaction and tests marketing skills in a poster fair format. Each team creates a poster with tagline and receives 5 star tokens for peer voting. Teams pitch their poster in 15-30 seconds to other participants who award stars based on appeal. The team receiving the most stars wins the Networking Winner title and earns 5 bonus points. Judges are not involved in this peer-based evaluation activity.",
|
| 90 |
+
"metadata": {
|
| 91 |
+
"game_number": 1,
|
| 92 |
+
"game_name": "Networking Game",
|
| 93 |
+
"format": "Poster fair",
|
| 94 |
+
"pitch_duration": "15-30 seconds",
|
| 95 |
+
"voting_system": "Peer-based stars",
|
| 96 |
+
"star_tokens": 5,
|
| 97 |
+
"bonus_points": 5,
|
| 98 |
+
"judge_involvement": false,
|
| 99 |
+
"time_slot": "13:00-13:30",
|
| 100 |
+
"category": "mini_game"
|
| 101 |
+
}
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"id": "novatra_round3",
|
| 105 |
+
"type": "competition_round",
|
| 106 |
+
"title": "NovaTra 1.0 Round 3: Final Presentation",
|
| 107 |
+
"text": "Round 3 is the Final Presentation for the top 10 teams, testing depth, adaptability, and market readiness through a 5-7 minute PowerPoint presentation combined with a chit-based situational challenge. Example challenges include scenarios like customer payment refusal, immediate competitor launch, or market pivot requirements. The presentation structure should cover problem, solution, target market, business model, feasibility, uniqueness, chit response, and conclusion. This round carries 65% of the total evaluation weight, focusing on problem clarity, feasibility, market understanding, uniqueness, and Q&A handling. Teams face penalties for exceeding the 7-minute time limit.",
|
| 108 |
+
"metadata": {
|
| 109 |
+
"round_number": 3,
|
| 110 |
+
"round_name": "Final Presentation",
|
| 111 |
+
"participants": "Top 10 teams",
|
| 112 |
+
"duration": "5-7 minutes",
|
| 113 |
+
"evaluation_weight": "65%",
|
| 114 |
+
"presentation_format": "PowerPoint",
|
| 115 |
+
"situational_challenge": true,
|
| 116 |
+
"structure_components": ["Problem", "Solution", "Market", "Business model", "Feasibility", "Uniqueness", "Challenge response"],
|
| 117 |
+
"time_slot": "15:00-17:00",
|
| 118 |
+
"category": "competition_round"
|
| 119 |
+
}
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"id": "novatra_guidelines",
|
| 123 |
+
"type": "participant_guidelines",
|
| 124 |
+
"title": "NovaTra 1.0 Participant Guidelines and Rules",
|
| 125 |
+
"text": "NovaTra 1.0 participants must form teams of 2-3 members from the same college with no solo or cross-college participation allowed. The competition is open to all undergraduate students with a registration fee of ₹149 per shortlisted team. All participants must follow MLSC VCET's official Instagram account as all announcements will be posted there. Participants are expected to maintain discipline throughout the event, avoid plagiarism in their submissions, and respect judges' decisions which are final. Teams must use mandatory hashtags #mlsc #novatra1.0 for social media content related to the competition.",
|
| 126 |
+
"metadata": {
|
| 127 |
+
"team_composition": "2-3 members, same college",
|
| 128 |
+
"eligibility": "Undergraduate students",
|
| 129 |
+
"fee": "₹149 per shortlisted team",
|
| 130 |
+
"social_media_requirement": "Follow @mlsc_vcet",
|
| 131 |
+
"mandatory_hashtags": ["#mlsc", "#novatra1.0"],
|
| 132 |
+
"conduct_rules": ["Maintain discipline", "Avoid plagiarism", "Respect judges"],
|
| 133 |
+
"category": "guidelines_rules"
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
+
]
|
main.py
CHANGED
|
@@ -6,7 +6,7 @@ from langchain_core.messages import HumanMessage, SystemMessage
|
|
| 6 |
from langchain_core.vectorstores import InMemoryVectorStore
|
| 7 |
from langchain_core.documents import Document
|
| 8 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 9 |
-
|
| 10 |
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
| 11 |
from langchain import hub
|
| 12 |
from langgraph.graph import START, StateGraph
|
|
@@ -64,15 +64,56 @@ embeddings = CohereEmbeddings(
|
|
| 64 |
|
| 65 |
vector_store = InMemoryVectorStore(embedding=embeddings)
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
| 70 |
-
# all_splits = text_splitter.split_text(data_1 + "\n\n" + data_2 + "\n\n" + data_3 + "\n\n" + data_4)
|
| 71 |
-
# all_splits = text_splitter.split_text(comb)
|
| 72 |
-
all_splits = text_splitter.split_documents(md_loader.load())
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
_ = vector_store.add_documents(documents=docs)
|
| 77 |
|
| 78 |
|
|
|
|
| 6 |
from langchain_core.vectorstores import InMemoryVectorStore
|
| 7 |
from langchain_core.documents import Document
|
| 8 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 9 |
+
import json
|
| 10 |
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
| 11 |
from langchain import hub
|
| 12 |
from langgraph.graph import START, StateGraph
|
|
|
|
| 64 |
|
| 65 |
vector_store = InMemoryVectorStore(embedding=embeddings)
|
| 66 |
|
| 67 |
+
# Load and parse comb.json instead of using the markdown loader
|
| 68 |
+
json_path = os.path.join(os.path.dirname(__file__), "comb.json")
|
| 69 |
+
with open(json_path, "r", encoding="utf-8") as f:
|
| 70 |
+
data = json.load(f)
|
| 71 |
|
| 72 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
def _collect_texts(node):
|
| 75 |
+
"""Recursively collect text strings from a JSON structure.
|
| 76 |
+
|
| 77 |
+
Supports: string, list, dict with common keys like 'text', 'content', 'body',
|
| 78 |
+
or a list of documents. Falls back to joining stringifiable values.
|
| 79 |
+
"""
|
| 80 |
+
texts = []
|
| 81 |
+
if isinstance(node, str):
|
| 82 |
+
texts.append(node)
|
| 83 |
+
elif isinstance(node, list):
|
| 84 |
+
for item in node:
|
| 85 |
+
texts.extend(_collect_texts(item))
|
| 86 |
+
elif isinstance(node, dict):
|
| 87 |
+
# common text keys
|
| 88 |
+
if "text" in node and isinstance(node["text"], str):
|
| 89 |
+
texts.append(node["text"])
|
| 90 |
+
elif "content" in node and isinstance(node["content"], str):
|
| 91 |
+
texts.append(node["content"])
|
| 92 |
+
elif "body" in node and isinstance(node["body"], str):
|
| 93 |
+
texts.append(node["body"])
|
| 94 |
+
elif "documents" in node and isinstance(node["documents"], list):
|
| 95 |
+
texts.extend(_collect_texts(node["documents"]))
|
| 96 |
+
else:
|
| 97 |
+
# fallback: stringify simple values
|
| 98 |
+
joined = " ".join(str(v) for v in node.values() if isinstance(v, (str, int, float)))
|
| 99 |
+
if joined:
|
| 100 |
+
texts.append(joined)
|
| 101 |
+
else:
|
| 102 |
+
texts.append(str(node))
|
| 103 |
+
return texts
|
| 104 |
+
|
| 105 |
+
raw_texts = _collect_texts(data)
|
| 106 |
+
|
| 107 |
+
# Split each raw text into chunks
|
| 108 |
+
all_splits = []
|
| 109 |
+
for t in raw_texts:
|
| 110 |
+
if not t:
|
| 111 |
+
continue
|
| 112 |
+
splits = text_splitter.split_text(t)
|
| 113 |
+
all_splits.extend(splits)
|
| 114 |
+
|
| 115 |
+
# Build Documents from split strings
|
| 116 |
+
docs = [Document(page_content=text) for text in all_splits]
|
| 117 |
_ = vector_store.add_documents(documents=docs)
|
| 118 |
|
| 119 |
|