Spaces:

SpatialWebAgent
/

SpatialParse

Runtime error

Shunfeng Zheng

Upload 89 files

17e77ea verified 8 months ago

16.7 kB

	import requests
	import urllib3
	import json
	from utils import geoutil
	import regex_spatial
	from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping
	import re
	import geopandas as gpd
	from geocoder import geo_level1
	from openai import OpenAI
	import numpy as np

	client = OpenAI(
	api_key='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'
	)


	model = "gpt-4o"

	north = ["north", "N'", "North", "NORTH"]
	south = ["south", "S'", "South", "SOUTH"]
	east = ["east", "E'", "East", "EAST"]
	west = ["west", "W'", "West", "WEST"]
	northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"]
	southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"]
	northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"]
	southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"]
	center = ["center","central", "downtown","midtown"]

	dataset_path = 'dataset/dataset_20.json'

	def get_geojson(ent, arr, centroid):
	poly_json = {}
	poly_json['type'] = 'FeatureCollection'
	poly_json['features'] = []
	coordinates= []
	coordinates.append(arr)
	poly_json['features'].append({
	'type':'Feature',
	'id': ent,
	'properties': {
	'centroid': centroid
	},
	'geometry': {
	'type':'Polygon',
	'coordinates': coordinates
	}
	})
	return poly_json


	def get_coordinates(ent):
	request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2'
	headers = {
	"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15"
	}
	page = requests.get(request_url, headers=headers, verify=False)
	json_content = json.loads(page.content)
	all_coordinates = json_content[0]['geojson']['coordinates'][0]
	centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
	for p in all_coordinates:
	p2 = (p[0], p[1])
	angle = geoutil.calculate_bearing(centroid, p2)
	p.append(angle)

	geojson = get_geojson(ent, all_coordinates, centroid)

	return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid']


	# level3
	def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum):
	# minimum = 157
	# maximum = 202

	direction_coordinates = []
	for p in coordinates:
	angle = geoutil.calculate_bearing(centroid, p)
	p2 = (p[0], p[1], angle)
	if direction in geo_level1.east:
	if angle >= minimum or angle <= maximum:
	direction_coordinates.append(p2)

	else:
	if angle >= minimum and angle <= maximum:
	direction_coordinates.append(p2)
	# print(type(direction_coordinates[0]))
	# if(direction in geo_level1.west):
	# direction_coordinates.sort(key=lambda k: k[2], reverse=True)

	return direction_coordinates
	def get_level3(level3):
	digits = re.findall('[0-9]+', level3)[0]
	unit = re.findall('[A-Za-z]+', level3)[0]
	return digits, unit

	def get_direction_coordinates(coordinates, centroid, level1):
	min_max = geo_level1.get_min_max(level1)
	if min_max is not None:
	coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1])
	return coord
	return coordinates
	def sort_west(poly1, poly2, centroid):
	coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"]
	coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"]
	coord1 = []
	coord2 = []
	coord = []
	for c in coords1:
	pol = list(c[::-1])
	coord1.extend(pol)
	for c in coords2:
	pol = list(c[::-1])
	coord2.extend(pol)
	coo1 = []
	coo2 = []
	for p in coord1:
	angle = geoutil.calculate_bearing(centroid, p)
	if angle >= 157 and angle <= 202:
	coo1.append((p[0], p[1], angle))
	for p in coord2:
	angle = geoutil.calculate_bearing(centroid, p)
	if angle >= 157 and angle <= 202:
	coo2.append((p[0], p[1], angle))
	coo1.extend(coo2)
	return coo1


	def get_level3_coordinates(coordinates, level_3, level1):
	distance, unit = get_level3(level_3)
	kms = geoutil.get_kilometers(distance, unit)
	coord = []

	poly1 = Polygon(coordinates[0])
	polygon1 = gpd.GeoSeries(poly1)
	poly2 = polygon1.buffer(0.0095 * kms, join_style=2)
	poly3 = polygon1.buffer(0.013 * kms, join_style=2)
	poly = poly3.difference(poly2)

	coords = mapping(poly)["features"][0]["geometry"]["coordinates"]

	for c in coords:
	pol = list(c[::-1])
	coord.extend(pol)

	if level1 is not None:
	coord = get_direction_coordinates(coord, coordinates[1], level1)
	if level1 in geo_level1.west:
	coord = sort_west(poly3, poly2, coordinates[1])

	# 计算新 coord 的几何中心（质心）
	if coord:
	center_point = MultiPoint(coord).centroid
	center = (center_point.x, center_point.y)
	else:
	center = coordinates[1] # fallback: 原始中心点

	return coord, center
	# level 3 end

	# between
	def get_between_coordinates(coordinates1, coordinates2):
	"""
	计算两个区域之间的中间点，并生成一个等面积的圆形区域。
	:param coordinates1: 第一个区域的边界坐标和中心点
	:param coordinates2: 第二个区域的边界坐标和中心点
	:return: 圆形区域的坐标集和圆心
	"""
	# 创建多边形对象
	poly1 = Polygon(coordinates1[0])
	poly2 = Polygon(coordinates2[0])

	# 计算两个区域的面积（近似 km²，需进一步优化投影转换）
	area1 = poly1.area
	area2 = poly2.area

	# 计算平均面积
	avg_area = (area1 + area2) / 2

	# 计算等面积圆的半径 r（单位 km）
	r_km = np.sqrt(avg_area / np.pi) * 111.32 # 使得 πr² ≈ avg_area

	# 计算圆心（两个中心点的中点）
	midpoint = ((coordinates1[1][0] + coordinates2[1][0]) / 2, (coordinates1[1][1] + coordinates2[1][1]) / 2)

	# 计算地球上的 1 度经纬度距离（近似值）
	lat_km = 111.32 # 1 度纬度 ≈ 111.32 km
	lon_km = 111.32 * np.cos(np.radians(midpoint[1])) # 1 度经度 ≈ 111.32 × cos(纬度)

	# 计算以 midpoint 为圆心，半径 r_km 的圆形坐标集
	circle_points = []
	for theta in np.linspace(0, 360, num=100): # 生成 100 个点构成圆形
	theta_rad = np.radians(theta)
	d_lat = (np.sin(theta_rad) * r_km) / lat_km
	d_lon = (np.cos(theta_rad) * r_km) / lon_km
	circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat))

	return circle_points, midpoint
	# between end


	def llmapi(text):
	system_prompt = (
	"你是一个资深的地理学家，你的任务是通过给定的一段自然语言，来选择正确的定位函数顺序以及他们的输入。\n"
	"你能选择的定位函数有：\n"
	"1. 相对定位（Relative Positioning）：输入为地点坐标，方位，距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n"
	"2. 中间定位（Between Positioning）：输入为两个地点的坐标，输出为两个地点坐标的中点。\n"
	"请先进行思维链（CoT）推理，并最终用 JSON 格式输出你的答案，用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n"
	"请确保所有输入仅包含：地点名称（字符串）、索引（整数）、方位（字符串，必须是英文）或距离（字符串，带单位），不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n"
	"每个步骤编号都有 id 记录，然后如果某个输入是之前步骤的输出，那么输入对应步骤的 id。\n"
	"所有方向必须使用英文（如 south, west, northeast, etc.）。\n"
	"示例输出：\n"
	"<<<JSON>>>\n"
	"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
	"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
	"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
	"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
	"<<<END>>>")

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)

	if json_match:
	# print(json.loads(json_match.group(1)))
	return json.loads(json_match.group(1))
	else:
	raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")
	def llmapi(text):
	system_prompt = (
	"You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n"
	"The positioning functions you can choose from are:\n"
	"1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n"
	"2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n"
	"You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations."
	"First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n"
	"Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n"
	"Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n"
	"All directions must be in English (e.g., south, west, northeast, etc.).\n"
	"Example output:\n"
	"<<<JSON>>>\n"
	"[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]},"
	"{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]},"
	"{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]},"
	"{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n"
	"<<<END>>>")

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": text},
	]

	chat_completion = client.chat.completions.create(
	messages=messages,
	model=model,
	)

	result = chat_completion.choices[0].message.content
	print(result)
	json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL)

	if json_match:
	return json.loads(json_match.group(1))
	else:
	raise ValueError("LLM 输出未包含预期的 JSON 格式数据。")


	def get_coordinates(location):
	request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2'
	headers = {"User-Agent": "Mozilla/5.0"}
	response = requests.get(request_url, headers=headers, verify=False)
	json_content = json.loads(response.content)
	coordinates = json_content[0]['geojson']['coordinates'][0]
	centroid = (float(json_content[0]['lon']), float(json_content[0]['lat']))
	return (coordinates, centroid)


	def execute_steps(steps):
	data = {}

	for step in steps:
	step_id = step['id']
	function = step['function']
	inputs = step['inputs']
	# print('-' * 50)
	# print(function)
	# print(inputs)


	resolved_inputs = []
	for inp in inputs:
	if isinstance(inp, int):
	resolved_inputs.append(data[inp])
	else:
	resolved_inputs.append(inp)
	if function == "Relative":
	location, direction, distance = resolved_inputs
	if isinstance(location, str):
	location = get_coordinates(location)
	result = get_level3_coordinates(location, distance, direction)
	print(location, distance, direction, 'rrr')
	print(result)
	data[step_id] = result

	elif function == "Between":
	location1, location2 = resolved_inputs
	if isinstance(location1, str):
	location1 = get_coordinates(location1)
	location1 = [location1[0]] + list(location1[1:])
	# location1 = [location1[0][0]] + list(location1[1:])


	# location1[0] = location1[0]
	if isinstance(location2, str):
	location2 = get_coordinates(location2)
	print(location2)
	location2 = [location2[0]] + list(location2[1:])
	# location2 = [location2[0][0]] + list(location2[1:])


	print(location1)
	result = get_between_coordinates(location1, location2)
	print(location1, location2, 'bbb')
	print(result)
	data[step_id] = result

	return data

	# a = get_coordinates('Burwood')
	# a2 = get_coordinates('Glebe')
	# b = get_level3_coordinates(a, '5 km', 'east')
	# c = get_between_coordinates(a, a2)

	# 完整通道
	# 默认输入
	default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。"
	default_input_text = "你是一位规划师，正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园（Mount Rainier National Park）和北喀斯喀特国家公园（North Cascades National Park）。首先，你想在这两个国家公园之间找到一个中间点。接着，你希望在这个中间点与北喀斯喀特国家公园之间，再取一个中间位置，以便确定最终的建设候选地。"
	default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置"
	default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel."
	# default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney."





	with open(dataset_path, 'r', encoding='utf-8') as f:
	data = json.load(f)


	answer_path = 'answer/GPT4o.json'



	answer = []
	for i in data:
	parsed_steps = llmapi(i['instruction'])
	# parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}]
	i["steps"] = parsed_steps



	# print(i['instruction'])

	with open(answer_path, "r", encoding="utf-8") as f:
	try:
	datapoint = json.load(f)
	except:
	datapoint = []

	datapoint.append(i)
	# print(answer)
	with open(answer_path, "w", encoding="utf-8") as f:
	json.dump(datapoint, f, ensure_ascii=False, indent=2)



	# 格式转化
	def write_custom_json(data, filename):
	def format_step(step):
	inputs = json.dumps(step["inputs"], ensure_ascii=False)
	return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}'

	with open(filename, "w", encoding="utf-8") as f:
	f.write("[\n")
	for i, item in enumerate(data):
	f.write(" {\n")
	f.write(f' "index": {item["index"]},\n')
	instruction = json.dumps(item["instruction"], ensure_ascii=False)
	f.write(f' "instruction": {instruction},\n')
	f.write(' "steps": [\n')
	step_lines = [f" {format_step(step)}" for step in item["steps"]]
	f.write(",\n".join(step_lines))
	f.write("\n ]\n")
	f.write(" }" + (",\n" if i < len(data) - 1 else "\n"))
	f.write("]\n")

	# Regenerate custom formatted JSON
	with open(answer_path, "r", encoding="utf-8") as f:
	data = json.load(f)

	write_custom_json(data, answer_path)