Spaces:
Runtime error
Runtime error
| import requests | |
| import urllib3 | |
| import json | |
| from utils import geoutil | |
| import regex_spatial | |
| from shapely.geometry import Polygon, MultiPoint, LineString, Point, mapping | |
| import re | |
| import geopandas as gpd | |
| from geocoder import geo_level1 | |
| from openai import OpenAI | |
| import numpy as np | |
| client = OpenAI( | |
| api_key='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' | |
| ) | |
| model = "gpt-4o" | |
| north = ["north", "N'", "North", "NORTH"] | |
| south = ["south", "S'", "South", "SOUTH"] | |
| east = ["east", "E'", "East", "EAST"] | |
| west = ["west", "W'", "West", "WEST"] | |
| northeast = ["north-east", "NE'", "north east", "NORTH-EAST", "North East", "NORTH EAST"] | |
| southeast = ["south-east", "SE'", "south east", "SOUTH-EAST", "South East", "SOUTH EAST"] | |
| northwest = ["north-west", "NW'", "north west", "NORTH-WEST", "North West", "NORTH WEST"] | |
| southwest = ["south-west", "SW'", "south west", "SOUTH-WEST", "South West", "SOUTH WEST"] | |
| center = ["center","central", "downtown","midtown"] | |
| dataset_path = 'dataset/dataset_20.json' | |
| def get_geojson(ent, arr, centroid): | |
| poly_json = {} | |
| poly_json['type'] = 'FeatureCollection' | |
| poly_json['features'] = [] | |
| coordinates= [] | |
| coordinates.append(arr) | |
| poly_json['features'].append({ | |
| 'type':'Feature', | |
| 'id': ent, | |
| 'properties': { | |
| 'centroid': centroid | |
| }, | |
| 'geometry': { | |
| 'type':'Polygon', | |
| 'coordinates': coordinates | |
| } | |
| }) | |
| return poly_json | |
| def get_coordinates(ent): | |
| request_url = 'https://nominatim.openstreetmap.org/search.php?q= ' +ent +'&polygon_geojson=1&accept-language=en&format=jsonv2' | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15" | |
| } | |
| page = requests.get(request_url, headers=headers, verify=False) | |
| json_content = json.loads(page.content) | |
| all_coordinates = json_content[0]['geojson']['coordinates'][0] | |
| centroid = (float(json_content[0]['lon']), float(json_content[0]['lat'])) | |
| for p in all_coordinates: | |
| p2 = (p[0], p[1]) | |
| angle = geoutil.calculate_bearing(centroid, p2) | |
| p.append(angle) | |
| geojson = get_geojson(ent, all_coordinates, centroid) | |
| return geojson['features'][0]['geometry']['coordinates'][0], geojson['features'][0]['properties']['centroid'] | |
| # level3 | |
| def get_directional_coordinates_by_angle(coordinates, centroid, direction, minimum, maximum): | |
| # minimum = 157 | |
| # maximum = 202 | |
| direction_coordinates = [] | |
| for p in coordinates: | |
| angle = geoutil.calculate_bearing(centroid, p) | |
| p2 = (p[0], p[1], angle) | |
| if direction in geo_level1.east: | |
| if angle >= minimum or angle <= maximum: | |
| direction_coordinates.append(p2) | |
| else: | |
| if angle >= minimum and angle <= maximum: | |
| direction_coordinates.append(p2) | |
| # print(type(direction_coordinates[0])) | |
| # if(direction in geo_level1.west): | |
| # direction_coordinates.sort(key=lambda k: k[2], reverse=True) | |
| return direction_coordinates | |
| def get_level3(level3): | |
| digits = re.findall('[0-9]+', level3)[0] | |
| unit = re.findall('[A-Za-z]+', level3)[0] | |
| return digits, unit | |
| def get_direction_coordinates(coordinates, centroid, level1): | |
| min_max = geo_level1.get_min_max(level1) | |
| if min_max is not None: | |
| coord = get_directional_coordinates_by_angle(coordinates, centroid, level1, min_max[0], min_max[1]) | |
| return coord | |
| return coordinates | |
| def sort_west(poly1, poly2, centroid): | |
| coords1 = mapping(poly1)["features"][0]["geometry"]["coordinates"] | |
| coords2 = mapping(poly2)["features"][0]["geometry"]["coordinates"] | |
| coord1 = [] | |
| coord2 = [] | |
| coord = [] | |
| for c in coords1: | |
| pol = list(c[::-1]) | |
| coord1.extend(pol) | |
| for c in coords2: | |
| pol = list(c[::-1]) | |
| coord2.extend(pol) | |
| coo1 = [] | |
| coo2 = [] | |
| for p in coord1: | |
| angle = geoutil.calculate_bearing(centroid, p) | |
| if angle >= 157 and angle <= 202: | |
| coo1.append((p[0], p[1], angle)) | |
| for p in coord2: | |
| angle = geoutil.calculate_bearing(centroid, p) | |
| if angle >= 157 and angle <= 202: | |
| coo2.append((p[0], p[1], angle)) | |
| coo1.extend(coo2) | |
| return coo1 | |
| def get_level3_coordinates(coordinates, level_3, level1): | |
| distance, unit = get_level3(level_3) | |
| kms = geoutil.get_kilometers(distance, unit) | |
| coord = [] | |
| poly1 = Polygon(coordinates[0]) | |
| polygon1 = gpd.GeoSeries(poly1) | |
| poly2 = polygon1.buffer(0.0095 * kms, join_style=2) | |
| poly3 = polygon1.buffer(0.013 * kms, join_style=2) | |
| poly = poly3.difference(poly2) | |
| coords = mapping(poly)["features"][0]["geometry"]["coordinates"] | |
| for c in coords: | |
| pol = list(c[::-1]) | |
| coord.extend(pol) | |
| if level1 is not None: | |
| coord = get_direction_coordinates(coord, coordinates[1], level1) | |
| if level1 in geo_level1.west: | |
| coord = sort_west(poly3, poly2, coordinates[1]) | |
| # 计算新 coord 的几何中心(质心) | |
| if coord: | |
| center_point = MultiPoint(coord).centroid | |
| center = (center_point.x, center_point.y) | |
| else: | |
| center = coordinates[1] # fallback: 原始中心点 | |
| return coord, center | |
| # level 3 end | |
| # between | |
| def get_between_coordinates(coordinates1, coordinates2): | |
| """ | |
| 计算两个区域之间的中间点,并生成一个等面积的圆形区域。 | |
| :param coordinates1: 第一个区域的边界坐标和中心点 | |
| :param coordinates2: 第二个区域的边界坐标和中心点 | |
| :return: 圆形区域的坐标集和圆心 | |
| """ | |
| # 创建多边形对象 | |
| poly1 = Polygon(coordinates1[0]) | |
| poly2 = Polygon(coordinates2[0]) | |
| # 计算两个区域的面积(近似 km²,需进一步优化投影转换) | |
| area1 = poly1.area | |
| area2 = poly2.area | |
| # 计算平均面积 | |
| avg_area = (area1 + area2) / 2 | |
| # 计算等面积圆的半径 r(单位 km) | |
| r_km = np.sqrt(avg_area / np.pi) * 111.32 # 使得 πr² ≈ avg_area | |
| # 计算圆心(两个中心点的中点) | |
| midpoint = ((coordinates1[1][0] + coordinates2[1][0]) / 2, (coordinates1[1][1] + coordinates2[1][1]) / 2) | |
| # 计算地球上的 1 度经纬度距离(近似值) | |
| lat_km = 111.32 # 1 度纬度 ≈ 111.32 km | |
| lon_km = 111.32 * np.cos(np.radians(midpoint[1])) # 1 度经度 ≈ 111.32 × cos(纬度) | |
| # 计算以 midpoint 为圆心,半径 r_km 的圆形坐标集 | |
| circle_points = [] | |
| for theta in np.linspace(0, 360, num=100): # 生成 100 个点构成圆形 | |
| theta_rad = np.radians(theta) | |
| d_lat = (np.sin(theta_rad) * r_km) / lat_km | |
| d_lon = (np.cos(theta_rad) * r_km) / lon_km | |
| circle_points.append((midpoint[0] + d_lon, midpoint[1] + d_lat)) | |
| return circle_points, midpoint | |
| # between end | |
| def llmapi(text): | |
| system_prompt = ( | |
| "你是一个资深的地理学家,你的任务是通过给定的一段自然语言,来选择正确的定位函数顺序以及他们的输入。\n" | |
| "你能选择的定位函数有:\n" | |
| "1. 相对定位(Relative Positioning):输入为地点坐标,方位,距离。输出为距离‘距离’输入的地点坐标的‘方位’的坐标。\n" | |
| "2. 中间定位(Between Positioning):输入为两个地点的坐标,输出为两个地点坐标的中点。\n" | |
| "请先进行思维链(CoT)推理,并最终用 JSON 格式输出你的答案,用 `<<<JSON>>>` 和 `<<<END>>>` 包裹起来。\n" | |
| "请确保所有输入仅包含:地点名称(字符串)、索引(整数)、方位(字符串,必须是英文)或距离(字符串,带单位),不允许返回诸如 'Chatswood 南4 km的坐标' 这样的内容。\n" | |
| "每个步骤编号都有 id 记录,然后如果某个输入是之前步骤的输出,那么输入对应步骤的 id。\n" | |
| "所有方向必须使用英文(如 south, west, northeast, etc.)。\n" | |
| "示例输出:\n" | |
| "<<<JSON>>>\n" | |
| "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]}," | |
| "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]}," | |
| "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]}," | |
| "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n" | |
| "<<<END>>>") | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": text}, | |
| ] | |
| chat_completion = client.chat.completions.create( | |
| messages=messages, | |
| model=model, | |
| ) | |
| result = chat_completion.choices[0].message.content | |
| json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL) | |
| if json_match: | |
| # print(json.loads(json_match.group(1))) | |
| return json.loads(json_match.group(1)) | |
| else: | |
| raise ValueError("LLM 输出未包含预期的 JSON 格式数据。") | |
| def llmapi(text): | |
| system_prompt = ( | |
| "You are an experienced geographer. Your task is to determine the correct sequence of positioning functions and their inputs based on a given piece of natural language.\n" | |
| "The positioning functions you can choose from are:\n" | |
| "1. Relative Positioning: Inputs is (location coordinate or location name, direction, and distance). Outputs the coordinates that are in the given 'direction' and 'distance' from the input location.\n" | |
| "2. Between Positioning: Inputs is (location 1 coordinates or location 1 name, location 2 coordinates or location 2 name). Outputs the midpoint coordinate between the two locations.\n" | |
| "You can only use the given functions, and the inputs to the functions must obey the above properties. The given functions can be combined to solve complex situations." | |
| "First, perform chain-of-thought (CoT) reasoning, and finally output your answer in JSON format, wrapped between `<<<JSON>>>` and `<<<END>>>`.\n" | |
| "Make sure all inputs only include: location names (strings), step indices (integers), directions (strings, must be in English), or distances (strings with units). Do not return expressions like 'the coordinate 4 km south of Chatswood'.\n" | |
| "Each step must have an 'id'. If the input of a step is the output of a previous step, use that step’s 'id' as the input.\n" | |
| "All directions must be in English (e.g., south, west, northeast, etc.).\n" | |
| "Example output:\n" | |
| "<<<JSON>>>\n" | |
| "[{\"id\": 1, \"function\": \"Relative\", \"inputs\": [\"Chatswood\", \"south\", \"4 km\"]}," | |
| "{\"id\": 2, \"function\": \"Relative\", \"inputs\": [\"North Sydney\", \"west\", \"2 km\"]}," | |
| "{\"id\": 3, \"function\": \"Between\", \"inputs\": [1, 2]}," | |
| "{\"id\": 4, \"function\": \"Relative\", \"inputs\": [3, \"southwest\", \"5 km\"]}]\n" | |
| "<<<END>>>") | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": text}, | |
| ] | |
| chat_completion = client.chat.completions.create( | |
| messages=messages, | |
| model=model, | |
| ) | |
| result = chat_completion.choices[0].message.content | |
| print(result) | |
| json_match = re.search(r'<<<JSON>>>\n(.*?)\n<<<END>>>', result, re.DOTALL) | |
| if json_match: | |
| return json.loads(json_match.group(1)) | |
| else: | |
| raise ValueError("LLM 输出未包含预期的 JSON 格式数据。") | |
| def get_coordinates(location): | |
| request_url = f'https://nominatim.openstreetmap.org/search.php?q={location}&polygon_geojson=1&accept-language=en&format=jsonv2' | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| response = requests.get(request_url, headers=headers, verify=False) | |
| json_content = json.loads(response.content) | |
| coordinates = json_content[0]['geojson']['coordinates'][0] | |
| centroid = (float(json_content[0]['lon']), float(json_content[0]['lat'])) | |
| return (coordinates, centroid) | |
| def execute_steps(steps): | |
| data = {} | |
| for step in steps: | |
| step_id = step['id'] | |
| function = step['function'] | |
| inputs = step['inputs'] | |
| # print('-' * 50) | |
| # print(function) | |
| # print(inputs) | |
| resolved_inputs = [] | |
| for inp in inputs: | |
| if isinstance(inp, int): | |
| resolved_inputs.append(data[inp]) | |
| else: | |
| resolved_inputs.append(inp) | |
| if function == "Relative": | |
| location, direction, distance = resolved_inputs | |
| if isinstance(location, str): | |
| location = get_coordinates(location) | |
| result = get_level3_coordinates(location, distance, direction) | |
| print(location, distance, direction, 'rrr') | |
| print(result) | |
| data[step_id] = result | |
| elif function == "Between": | |
| location1, location2 = resolved_inputs | |
| if isinstance(location1, str): | |
| location1 = get_coordinates(location1) | |
| location1 = [location1[0]] + list(location1[1:]) | |
| # location1 = [location1[0][0]] + list(location1[1:]) | |
| # location1[0] = location1[0] | |
| if isinstance(location2, str): | |
| location2 = get_coordinates(location2) | |
| print(location2) | |
| location2 = [location2[0]] + list(location2[1:]) | |
| # location2 = [location2[0][0]] + list(location2[1:]) | |
| print(location1) | |
| result = get_between_coordinates(location1, location2) | |
| print(location1, location2, 'bbb') | |
| print(result) | |
| data[step_id] = result | |
| return data | |
| # a = get_coordinates('Burwood') | |
| # a2 = get_coordinates('Glebe') | |
| # b = get_level3_coordinates(a, '5 km', 'east') | |
| # c = get_between_coordinates(a, a2) | |
| # 完整通道 | |
| # 默认输入 | |
| default_input_text = "在Chatswood南边4公里与North Sydney 东边2公里的中间的西南5公里。" | |
| default_input_text = "你是一位规划师,正在为华盛顿州的一项新森林监测站选址。两个潜在的参考位置分别是雷尼尔山国家公园(Mount Rainier National Park)和北喀斯喀特国家公园(North Cascades National Park)。首先,你想在这两个国家公园之间找到一个中间点。接着,你希望在这个中间点与北喀斯喀特国家公园之间,再取一个中间位置,以便确定最终的建设候选地。" | |
| default_input_text = "在Chatswood和North Sydney的中间靠近North Sydney的四分之一位置" | |
| default_input_text = "Plan a trip that involves determining the midpoint between Paris and London, and then finding another midpoint between this location and Paris to identify potential stopovers during travel." | |
| # default_input_text = "5km southwest of Chatswood, 4km south of Chatswood and 2km north of North Sydney." | |
| with open(dataset_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| answer_path = 'answer/GPT4o.json' | |
| answer = [] | |
| for i in data: | |
| parsed_steps = llmapi(i['instruction']) | |
| # parsed_steps = [{"id": 1, "function": "Between", "inputs": ["Chatswood", "North Sydney"]},{"id": 2, "function": "Between", "inputs": [1, "North Sydney"]}] | |
| i["steps"] = parsed_steps | |
| # print(i['instruction']) | |
| with open(answer_path, "r", encoding="utf-8") as f: | |
| try: | |
| datapoint = json.load(f) | |
| except: | |
| datapoint = [] | |
| datapoint.append(i) | |
| # print(answer) | |
| with open(answer_path, "w", encoding="utf-8") as f: | |
| json.dump(datapoint, f, ensure_ascii=False, indent=2) | |
| # 格式转化 | |
| def write_custom_json(data, filename): | |
| def format_step(step): | |
| inputs = json.dumps(step["inputs"], ensure_ascii=False) | |
| return f'{{"id": {step["id"]}, "function": "{step["function"]}", "inputs": {inputs}}}' | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write("[\n") | |
| for i, item in enumerate(data): | |
| f.write(" {\n") | |
| f.write(f' "index": {item["index"]},\n') | |
| instruction = json.dumps(item["instruction"], ensure_ascii=False) | |
| f.write(f' "instruction": {instruction},\n') | |
| f.write(' "steps": [\n') | |
| step_lines = [f" {format_step(step)}" for step in item["steps"]] | |
| f.write(",\n".join(step_lines)) | |
| f.write("\n ]\n") | |
| f.write(" }" + (",\n" if i < len(data) - 1 else "\n")) | |
| f.write("]\n") | |
| # Regenerate custom formatted JSON | |
| with open(answer_path, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| write_custom_json(data, answer_path) |