Spaces:

tjw
/

gauge

Runtime error

App Files Files Community

tjw commited on Nov 10, 2024

Commit

ba529ff

1 Parent(s): 5856612

init

Browse files

Files changed (5) hide show

aimodel.py +404 -0
environment.yml +435 -0
main.py +57 -0
readme.txt +5 -0
test_rect.py +141 -0

aimodel.py ADDED Viewed

	@@ -0,0 +1,404 @@

+# %%
+import matplotlib.style
+from transformers import AutoProcessor, AutoModelForCausalLM
+from PIL import Image
+import pickle
+import torch
+from pathlib import Path
+from PIL import Image
+from PIL import ImageDraw
+from IPython.display import display
+import numpy as np
+from collections import namedtuple
+from logging import getLogger
+logger = getLogger(__name__)
+# %%
+class Florence:
+    def __init__(self, model_id:str, hack=False):
+        if hack:
+            return
+        self.model = (
+            AutoModelForCausalLM.from_pretrained(
+                model_id, trust_remote_code=True, torch_dtype="auto"
+            )
+            .eval()
+            .cuda()
+        )
+        self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+        self.model_id = model_id
+    def run(self, img:Image, task_prompt:str, extra_text:str|None=None):
+        logger.debug(f"run {task_prompt} {extra_text}")
+        model, processor = self.model, self.processor
+        prompt = task_prompt + (extra_text if extra_text else "")
+        inputs = processor(text=prompt, images=img, return_tensors="pt").to(
+            "cuda", torch.float16
+        )
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            early_stopping=False,
+            do_sample=False,
+            num_beams=3,
+            #temperature=0.1,
+        )
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        parsed_answer = processor.post_process_generation(
+            generated_text,
+            task=task_prompt,
+            image_size=(img.width, img.height),
+        )
+        return parsed_answer
+def model_init(hack=False):
+    fl = Florence("microsoft/Florence-2-large", hack=hack)
+    fl_ft = Florence("microsoft/Florence-2-large-ft", hack=hack)
+    return fl, fl_ft
+#%%
+# florence-2 tasks
+TASK_OD = "<OD>"
+TASK_SEGMENTATION = '<REFERRING_EXPRESSION_SEGMENTATION>'
+TASK_CAPTION = "<CAPTION_TO_PHRASE_GROUNDING>"
+TASK_OCR = "<OCR_WITH_REGION>"
+TASK_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
+#%%
+AIModelResult = namedtuple('AIModelResult',
+                         ['img', 'img2', 'meter_bbox', 'needle_polygons', 'circle_polygons', 'ocr1', 'ocr2'])
+cached_results:dict[str, AIModelResult] = {}
+#%%
+def get_meter_bbox(fl:Florence, img:Image):
+    task_prompt, extra_text = TASK_GROUNDING, "a circular meter with white background"
+    parsed_answer = fl.run(img, task_prompt, extra_text)
+    assert len(parsed_answer) == 1
+    k,v = parsed_answer.popitem()
+    assert 'bboxes' in v
+    assert 'labels' in v
+    assert len(v['bboxes']) == 1
+    assert len(v['labels']) == 1
+    assert v['labels'][0] == 'a circular meter'
+    bbox = v['bboxes'][0]
+    return bbox
+def get_circles(fl:Florence, img2:Image, polygons:list):
+    img3 = Image.new('L', img2.size, color = 'black')
+    draw = ImageDraw.Draw(img3)
+    for polygon in polygons:
+        draw.polygon(polygon, outline='white', width=3, fill='white')
+    img2a = np.where(np.array(img3)[:,:,None]>0,  np.array(img2), 255)
+    img4 = Image.fromarray(img2a)
+    parsed_answer = fl.run(img4, TASK_SEGMENTATION, "a circle")
+    assert len(parsed_answer) == 1
+    k,v = parsed_answer.popitem()
+    assert 'polygons' in v
+    assert len(v['polygons']) == 1
+    return v['polygons'][0]
+def get_needle_polygons(fl:Florence, img2:Image):
+    parsed_answer = fl.run(img2, TASK_SEGMENTATION, "the long narrow black needle hand pass through the center of the cicular meter")
+    assert len(parsed_answer) == 1
+    k,v = parsed_answer.popitem()
+    assert 'polygons' in v
+    assert len(v['polygons']) == 1
+    needle_polygons = v['polygons'][0]
+    return needle_polygons
+def get_ocr(fl:Florence, img2:Image):
+    parsed_answer = fl.run(img2, TASK_OCR)
+    assert len(parsed_answer)==1
+    k,v = parsed_answer.popitem()
+    return v
+def get_ai_model_result(img:Image.Image|Path|str, fl:Florence, fl_ft:Florence):
+    if isinstance(img, Path):
+        key = img.parts[-1]
+    elif isinstance(img, str):
+        key = img.split('/')[-1]
+    else:
+        key = None
+    if key is not None and key in cached_results:
+        return cached_results[key]
+    if isinstance(img, (Path, str)):
+        img = Image.open(img)
+    meter_bbox = get_meter_bbox(fl, img)
+    img2 = img.crop(meter_bbox)
+    needle_polygons = get_needle_polygons(fl, img2)
+    result = AIModelResult(img, img2, meter_bbox, needle_polygons,
+                            get_circles(fl, img2, needle_polygons),
+                            get_ocr(fl, img2),
+                            get_ocr(fl_ft, img2)
+                            )
+    if key is not None:
+        cached_results[key] = result
+    return result
+#%%
+from skimage.measure import regionprops
+from skimage.measure import EllipseModel
+from skimage.draw import ellipse_perimeter
+def get_regionprops(polygons:list) -> regionprops:
+    coords = np.concatenate(polygons).reshape(-1, 2)
+    size = tuple( (coords.max(axis=0)+2).astype('int') )
+    img = Image.new('L', size, color = 'black')
+    # draw circle polygon
+    draw = ImageDraw.Draw(img)
+    for polygon in polygons:
+        draw.polygon(polygon, outline='white', width=1, fill='white')
+    # use skimage to find the mass center of the circle
+    circle_imga = (np.array(img)>0).astype(np.uint8)
+    property = regionprops(circle_imga)[0]
+    return property
+def estimate_ellipse(coords, enlarge_factor=1.0):
+    em = EllipseModel()
+    em.estimate(coords[:, ::-1])
+    y, x, a, b, theta = em.params
+    a, b = a*enlarge_factor, b*enlarge_factor
+    em_params = np.round([y,x, a, b]).astype('int')
+    c, r = ellipse_perimeter(*em_params, orientation=-theta)
+    return em_params, theta, (c, r)
+def estimate_line(coords):
+    lm = LineModelND()
+    lm.estimate(coords)
+    return lm.params
+#%%
+#%%
+from matplotlib import pyplot as plt
+import matplotlib
+from skimage.measure import LineModelND, ransac
+matplotlib.style.use('dark_background')
+def rotate_theta(theta):
+    return ((theta + 3*np.pi/2)%(2*np.pi))/(2*np.pi)*360
+kg_cm2_labels = list(map(str, [1,3,5,7,9,11]))
+psi_labels = list(map(str, range(20, 180, 20)))
+# lousy decoupling
+MeterResult = namedtuple('MeterResult', [
+                                         'result',
+                                         'needle_psi',
+                                         'needle_kg_cm2',
+                                         'needle_theta',
+                                         'orign',
+                                         'direction',
+                                         'center',
+                                         'lm',
+                                         'inliers',
+                                         'kg_cm2_texts',
+                                         'psi_texts',
+                                         'kg_cm2_centers',
+                                         'psi_centers',
+                                         'kg_cm2_theta',
+                                         'psi_theta',
+                                         'kg_cm2_psi',
+                                         'psi'                                 ,
+                                         ])
+def read_meter(img:Image.Image|str|Path, fl, fl_ft):
+    # ai model results
+    result = get_ai_model_result(img, fl, fl_ft)
+    # needle direction
+    coords = np.concatenate(result.needle_polygons).reshape(-1, 2)
+    orign, direction = estimate_line(coords)
+    # calculate the meter center
+    circle_props = get_regionprops(result.circle_polygons)
+    center = circle_props.centroid[::-1]
+    # XXX: the needle direction is from center to orign
+    if (orign - center) @ direction < 0:
+        direction = -direction
+    # calculate the needle theta
+    needle_theta = rotate_theta(np.arctan2(direction[1], direction[0]))
+    # calulate ocr texts to find kg/cm2 and psi labels
+    ocr1, ocr2 = result.ocr1, result.ocr2
+    kg_cm2_texts = {}
+    psi_texts = {}
+    quad_boxes = ocr1['quad_boxes']+ocr2['quad_boxes']
+    labels = ocr1['labels']+ocr2['labels']
+    for qbox, label in zip(quad_boxes, labels):
+        if label in kg_cm2_labels:
+            kg_cm2_texts[int(label)]=qbox
+        if label in psi_labels:
+            psi_texts[int(label)]=qbox
+    # calculate the center of kg/cm2 and psi labels
+    kg_cm2_centers = np.array(list(kg_cm2_texts.values())).reshape(-1, 4, 2).mean(axis=1)
+    psi_centers = np.array(list(psi_texts.values())).reshape(-1, 4, 2).mean(axis=1)
+    # convert kg/cm2 and psi labels to polar coordinates, origin is the center of the meter
+    # the angle is in degree which is more intuitive
+    kg_cm2_coords = kg_cm2_centers - center
+    kg_cm2_theta = rotate_theta(np.arctan2(kg_cm2_coords[:, 1], kg_cm2_coords[:, 0]))
+    psi_coords = psi_centers - center
+    psi_theta = rotate_theta(np.arctan2(psi_coords[:, 1], psi_coords[:, 0]))
+    # convert kg_cm2 to psi for fitting a line model
+    kg_cm2 = np.array(list(kg_cm2_texts.keys()))
+    kg_cm2_psi = kg_cm2 * 14.223
+    # combine kg/cm2 and psi labels to fit a line model
+    psi = np.array(list(psi_texts.keys()))
+    Y = np.concatenate([kg_cm2_psi, psi])
+    X = np.concatenate([kg_cm2_theta, psi_theta])
+    data = np.stack([X, Y], axis=1)
+    # run ransac to robustly fit a line model
+    lm, inliers = ransac(data, LineModelND, min_samples=2,
+           residual_threshold=15,
+           max_trials=2)
+    # use the model to calculated the needle psi and kg/cm2
+    needle_psi = lm.predict(needle_theta)[1]
+    needle_kg_cm2 = needle_psi / 14.223
+    return MeterResult(result=result,
+                          needle_psi=needle_psi,
+                          needle_kg_cm2=needle_kg_cm2,
+                          needle_theta=needle_theta,
+                          orign=orign,
+                          direction=direction,
+                          center=center,
+                          lm=lm,
+                          inliers=data[inliers].T,
+                          kg_cm2_texts=kg_cm2_texts,
+                          psi_texts=psi_texts,
+                          kg_cm2_centers=kg_cm2_centers,
+                          psi_centers=psi_centers,
+                          kg_cm2_theta=kg_cm2_theta,
+                          psi_theta=psi_theta,
+                          kg_cm2_psi=kg_cm2_psi,
+                          psi=psi,
+    )
+def more_visualization_data(meter_result:MeterResult):
+    result = meter_result.result
+    center = meter_result.center
+    # following calculations are for visualization and debugging
+    # calculate the needle head(farest point from center)
+    needle_coordinates = np.concatenate(result.needle_polygons).reshape(-1, 2)
+    needle_length = np.linalg.norm(needle_coordinates - center,axis=1)
+    farest_idx = np.argmax(needle_length)
+    needle_head = needle_coordinates[farest_idx]
+    needle_head_length = needle_length[farest_idx]
+    direction = meter_result.direction * needle_head_length
+    # inliners data
+    inlier_theta, inlier_psi = meter_result.inliers
+    # predict psi from 0 to 360
+    predict_theta = np.linspace(0, 360, 100)
+    predict_psi = meter_result.lm.predict(predict_theta)[:, 1]
+    return inlier_theta, inlier_psi, predict_theta, predict_psi, needle_head, direction
+def visualization(meter_result:MeterResult):
+    result = meter_result.result
+    center = meter_result.center
+    needle_psi, needle_kg_cm2 = meter_result.needle_psi, meter_result.needle_kg_cm2
+    inlier_theta, inlier_psi, predict_theta, predict_psi, needle_head, direction = more_visualization_data(meter_result)
+    # drawing and visualization
+    draw = ImageDraw.Draw(result.img2.copy())
+    # draw needle polygons
+    for polygon in result.needle_polygons:
+        draw.polygon(polygon, outline='red', width=3)
+    # draw center circle
+    draw = ImageDraw.Draw(draw._image.convert('RGBA'))
+    draw2 = ImageDraw.Draw(Image.new('RGBA', draw._image.size, (0,0,0,0)))
+    for polygon in result.circle_polygons:
+        draw2.polygon(polygon, outline='purple', width=1, fill = (255,128,255,100))
+    img = Image.alpha_composite(draw._image, draw2._image)
+    draw = ImageDraw.Draw(img.convert('RGB'))
+    # draw needle direction
+    draw.line((center[0], center[1], center[0]+direction[0], center[1]+direction[1]), fill='yellow', width=3)
+    # draw a dot at center
+    draw.ellipse((center[0]-5, center[1]-5, center[0]+5, center[1]+5), outline='yellow', width=3)
+    # draw a dot at needle_head
+    draw.ellipse((needle_head[0]-5, needle_head[1]-5, needle_head[0]+5, needle_head[1]+5), outline='yellow', width=3)
+    for x,y in meter_result.kg_cm2_centers:
+        draw.ellipse((x-3, y-3, x+3, y+3), outline='blue', width=3)
+    for x,y in meter_result.psi_centers:
+        draw.ellipse((x-3, y-3, x+3, y+3), outline='green', width=3)
+    for label,quad_box in meter_result.kg_cm2_texts.items():
+        draw.polygon(quad_box, outline='blue', width=1)
+        draw.text((quad_box[0], quad_box[1]-10), str(label), fill='blue', anchor='ls')
+    for label,quad_box in meter_result.psi_texts.items():
+        draw.polygon(quad_box, outline='green', width=1)
+        draw.text((quad_box[0], quad_box[1]-10), str(label), fill='green', anchor='ls')
+    if len(meter_result.kg_cm2_centers) >4:
+        # the ellipse of kg/cm2 labels, currently only for visualization
+        em_params, theta, (c, r) = estimate_ellipse(meter_result.kg_cm2_centers)
+        y, x = em_params[:2]
+        draw.ellipse((x-5, y-5, x+5, y+5), outline='blue', width=1)
+        imga = np.array(draw._image)
+        imga[c,r] = (0, 0, 255)
+        draw = ImageDraw.Draw(Image.fromarray(imga))
+    if len(meter_result.psi_centers) >4:
+        # the ellipse of psi labels, currently only for visualization
+        em_params, theta, (c, r) = estimate_ellipse(meter_result.psi_centers)
+        draw.ellipse((x-5, y-5, x+5, y+5), outline='green', width=1)
+        imga = np.array(draw._image)
+        imga[c,r] = (0, 255, 0)
+        y, x = em_params[:2]
+        draw = ImageDraw.Draw(Image.fromarray(imga))
+    draw.text((needle_head[0]-10, needle_head[1]-10),
+              f'psi={needle_psi:.1f} kg_cm2={needle_kg_cm2:.2f}',anchor='ls',
+              fill='yellow')
+    plt.plot(predict_theta, predict_psi, color='red', alpha=0.5)
+    plt.plot(meter_result.kg_cm2_theta, meter_result.kg_cm2_psi, 'o', color='#77F')
+    plt.plot(meter_result.psi_theta, meter_result.psi, 'o', color='#7F7')
+    plt.plot(inlier_theta, inlier_psi, 'x', color='red', alpha=0.5)
+    plt.vlines(meter_result.needle_theta, 0, 160, colors='yellow', alpha=0.5)
+    plt.hlines(meter_result.needle_psi, 0, 360, colors='yellow', alpha=0.5)
+    plt.text(meter_result.needle_theta-20, meter_result.needle_psi-20,
+             f'psi={needle_psi:.1f} kg_cm2={needle_kg_cm2:.2f}', color='yellow')
+    plt.xlim(0, 360)
+    plt.ylim(0, 160)
+    return draw._image, plt.gcf()
+def clear_cache():
+    cached_results.clear()
+def save_cache():
+    pickle.dump(cached_results, open('cached_results.pkl', 'wb'))
+def load_cache():
+    global cached_results
+    cached_results = pickle.load(open('cached_results.pkl', 'rb'))
+#%%
+if __name__ == '__main__':
+    from io import BytesIO
+    fl, fl_ft = model_init(hack=False)
+    #load_cache()
+    clear_cache()
+    imgs = list(Path('images/good').glob('*.jpg'))#[-1:]
+    W, H = 640, 480
+    for img_fn in imgs:
+        print(img_fn)
+        meter_result = read_meter(img_fn, fl, fl_ft)
+        img, fig = visualization(meter_result)
+        # resize draw._image to fit WxH and keep aspect ratio
+        w, h = meter_result.result.img2.size
+        if w/W > h/H:
+            w, h = W, int(h*W/w)
+        else:
+            w, h = int(w*H/h), H
+        display(img.resize((w, h)))
+        # convert figure to PIL image using io.BytesIO
+        buf = BytesIO()
+        fig.savefig(buf, format='png')
+        buf.seek(0)
+        fig_img = Image.open(buf)
+        display(fig_img)
+        # clear plot
+        plt.clf()
+# %%

environment.yml ADDED Viewed

	@@ -0,0 +1,435 @@

+name: guage
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_gnu
+  - aom=3.6.1=h59595ed_0
+  - binutils_impl_linux-64=2.43=h4bf12b8_2
+  - binutils_linux-64=2.43=h4852527_2
+  - blas=1.0=mkl
+  - blosc=1.21.6=hef167b5_0
+  - brotli=1.1.0=hb9d3cd8_2
+  - brotli-bin=1.1.0=hb9d3cd8_2
+  - brotli-python=1.1.0=py311hfdbb021_2
+  - brunsli=0.1=h9c3ff4c_0
+  - bzip2=1.0.8=h4bc722e_7
+  - c-blosc2=2.14.4=hb4ffafa_1
+  - ca-certificates=2024.8.30=hbcca054_0
+  - certifi=2024.8.30=pyhd8ed1ab_0
+  - cffi=1.17.1=py311hf29c0ef_0
+  - charls=2.4.2=h59595ed_0
+  - charset-normalizer=3.4.0=pyhd8ed1ab_0
+  - cpython=3.11.10=py311hd8ed1ab_3
+  - cuda-cccl=12.6.77=0
+  - cuda-cccl_linux-64=12.6.77=0
+  - cuda-command-line-tools=12.1.1=0
+  - cuda-compiler=12.6.2=0
+  - cuda-crt-dev_linux-64=12.6.20=0
+  - cuda-crt-tools=12.6.20=0
+  - cuda-cudart=12.1.105=0
+  - cuda-cudart-dev=12.1.105=0
+  - cuda-cudart-dev_linux-64=12.6.77=0
+  - cuda-cudart-static=12.6.77=0
+  - cuda-cudart-static_linux-64=12.6.77=0
+  - cuda-cudart_linux-64=12.6.77=0
+  - cuda-cuobjdump=12.6.77=0
+  - cuda-cupti=12.1.105=0
+  - cuda-cuxxfilt=12.6.77=0
+  - cuda-documentation=12.4.127=0
+  - cuda-driver-dev=12.6.77=0
+  - cuda-driver-dev_linux-64=12.6.77=0
+  - cuda-gdb=12.6.77=0
+  - cuda-libraries=12.1.0=0
+  - cuda-libraries-dev=12.6.2=0
+  - cuda-libraries-static=12.6.2=0
+  - cuda-nsight=12.6.77=0
+  - cuda-nvcc=12.6.20=0
+  - cuda-nvcc-dev_linux-64=12.6.20=0
+  - cuda-nvcc-impl=12.6.20=0
+  - cuda-nvcc-tools=12.6.20=0
+  - cuda-nvcc_linux-64=12.6.20=0
+  - cuda-nvdisasm=12.6.77=0
+  - cuda-nvml-dev=12.6.77=2
+  - cuda-nvprof=12.6.80=0
+  - cuda-nvprune=12.6.77=0
+  - cuda-nvrtc=12.1.105=0
+  - cuda-nvrtc-dev=12.1.105=0
+  - cuda-nvrtc-static=12.6.77=0
+  - cuda-nvtx=12.1.105=0
+  - cuda-nvvm-dev_linux-64=12.6.20=0
+  - cuda-nvvm-impl=12.6.20=0
+  - cuda-nvvm-tools=12.6.20=0
+  - cuda-nvvp=12.6.80=0
+  - cuda-opencl=12.6.77=0
+  - cuda-opencl-dev=12.6.77=0
+  - cuda-profiler-api=12.6.77=0
+  - cuda-runtime=12.1.0=0
+  - cuda-sanitizer-api=12.6.77=0
+  - cuda-toolkit=12.1.0=0
+  - cuda-tools=12.1.1=0
+  - cuda-version=12.6=3
+  - cuda-visual-tools=12.6.2=0
+  - dav1d=1.2.1=hd590300_0
+  - dbus=1.13.18=hb2f20db_0
+  - expat=2.6.4=h5888daf_0
+  - ffmpeg=4.4.0=h6987444_4
+  - filelock=3.16.1=pyhd8ed1ab_0
+  - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
+  - font-ttf-inconsolata=3.000=h77eed37_0
+  - font-ttf-source-code-pro=2.038=h77eed37_0
+  - font-ttf-ubuntu=0.83=h77eed37_3
+  - fontconfig=2.15.0=h7e30c49_1
+  - fonts-conda-ecosystem=1=0
+  - fonts-conda-forge=1=0
+  - freetype=2.12.1=h267a509_2
+  - gcc_impl_linux-64=12.4.0=hb2e57f8_1
+  - gcc_linux-64=12.4.0=h6b7512a_5
+  - gds-tools=1.11.1.6=0
+  - gettext=0.22.5=he02047a_3
+  - gettext-tools=0.22.5=he02047a_3
+  - giflib=5.2.2=hd590300_0
+  - glib=2.82.2=h44428e9_0
+  - glib-tools=2.82.2=h4833e2c_0
+  - gmp=6.3.0=hac33072_2
+  - gmpy2=2.1.5=py311h0f6cedb_2
+  - gnutls=3.6.13=h85f3911_1
+  - gxx_impl_linux-64=12.4.0=h613a52c_1
+  - gxx_linux-64=12.4.0=h8489865_5
+  - h2=4.1.0=pyhd8ed1ab_0
+  - hpack=4.0.0=pyh9f0ad1d_0
+  - hyperframe=6.0.1=pyhd8ed1ab_0
+  - idna=3.10=pyhd8ed1ab_0
+  - imagecodecs=2024.1.1=py311hbe88301_6
+  - imageio=2.36.0=pyh12aca89_1
+  - importlib-metadata=8.5.0=pyha770c72_0
+  - intel-openmp=2022.0.1=h06a4308_3633
+  - jinja2=3.1.4=pyhd8ed1ab_0
+  - jxrlib=1.1=hd590300_3
+  - kernel-headers_linux-64=3.10.0=he073ed8_18
+  - lame=3.100=h166bdaf_1003
+  - lazy-loader=0.4=pyhd8ed1ab_1
+  - lazy_loader=0.4=pyhd8ed1ab_1
+  - lcms2=2.16=hb7c19ff_0
+  - ld_impl_linux-64=2.43=h712a8e2_2
+  - lerc=4.0.0=h27087fc_0
+  - libaec=1.1.3=h59595ed_0
+  - libasprintf=0.22.5=he8f35ee_3
+  - libasprintf-devel=0.22.5=he8f35ee_3
+  - libavif16=1.0.1=h87da1f6_2
+  - libblas=3.9.0=16_linux64_mkl
+  - libbrotlicommon=1.1.0=hb9d3cd8_2
+  - libbrotlidec=1.1.0=hb9d3cd8_2
+  - libbrotlienc=1.1.0=hb9d3cd8_2
+  - libcblas=3.9.0=16_linux64_mkl
+  - libcublas=12.1.0.26=0
+  - libcublas-dev=12.1.0.26=0
+  - libcublas-static=12.6.3.3=0
+  - libcufft=11.0.2.4=0
+  - libcufft-dev=11.0.2.4=0
+  - libcufft-static=11.3.0.4=0
+  - libcufile=1.11.1.6=0
+  - libcufile-dev=1.11.1.6=0
+  - libcufile-static=1.11.1.6=0
+  - libcurand=10.3.7.77=0
+  - libcurand-dev=10.3.7.77=0
+  - libcurand-static=10.3.7.77=0
+  - libcusolver=11.4.4.55=0
+  - libcusolver-dev=11.4.4.55=0
+  - libcusolver-static=11.7.1.2=0
+  - libcusparse=12.0.2.55=0
+  - libcusparse-dev=12.0.2.55=0
+  - libcusparse-static=12.5.4.2=0
+  - libdeflate=1.20=hd590300_0
+  - libdrm=2.4.123=hb9d3cd8_0
+  - libegl=1.7.0=ha4b6fd6_1
+  - libexpat=2.6.4=h5888daf_0
+  - libffi=3.4.2=h7f98852_5
+  - libgcc=14.2.0=h77fa898_1
+  - libgcc-devel_linux-64=12.4.0=ha4f9413_101
+  - libgcc-ng=14.2.0=h69a702a_1
+  - libgettextpo=0.22.5=he02047a_3
+  - libgettextpo-devel=0.22.5=he02047a_3
+  - libgfortran=14.2.0=h69a702a_1
+  - libgfortran5=14.2.0=hd5240d6_1
+  - libgl=1.7.0=ha4b6fd6_1
+  - libglib=2.82.2=h2ff4ddf_0
+  - libglvnd=1.7.0=ha4b6fd6_1
+  - libglx=1.7.0=ha4b6fd6_1
+  - libgomp=14.2.0=h77fa898_1
+  - libhwy=1.1.0=h00ab1b0_0
+  - libiconv=1.17=hd590300_2
+  - libidn2=2.3.7=hd590300_0
+  - libjpeg-turbo=3.0.0=hd590300_1
+  - libjxl=0.10.3=h66b40c8_0
+  - liblapack=3.9.0=16_linux64_mkl
+  - libnpp=12.0.2.50=0
+  - libnpp-dev=12.0.2.50=0
+  - libnpp-static=12.3.1.54=0
+  - libnsl=2.0.1=hd590300_0
+  - libnvfatbin=12.6.77=0
+  - libnvfatbin-dev=12.6.77=0
+  - libnvfatbin-static=12.6.77=0
+  - libnvjitlink=12.1.105=0
+  - libnvjitlink-dev=12.1.105=0
+  - libnvjitlink-static=12.6.77=0
+  - libnvjpeg=12.1.1.14=0
+  - libnvjpeg-dev=12.1.1.14=0
+  - libnvjpeg-static=12.3.3.54=0
+  - libnvvm-samples=12.1.105=0
+  - libpciaccess=0.18=hd590300_0
+  - libpng=1.6.44=hadc24fc_0
+  - libsanitizer=12.4.0=h46f95d5_1
+  - libsqlite=3.47.0=hadc24fc_1
+  - libstdcxx=14.2.0=hc0a3c3a_1
+  - libstdcxx-devel_linux-64=12.4.0=ha4f9413_101
+  - libstdcxx-ng=14.2.0=h4852527_1
+  - libtasn1=4.19.0=h166bdaf_0
+  - libtiff=4.6.0=h1dd3fc0_3
+  - libunistring=0.9.10=h7f98852_0
+  - libuuid=2.38.1=h0b41bf4_0
+  - libva=2.22.0=h8a09558_1
+  - libvpx=1.11.0=h9c3ff4c_3
+  - libwebp=1.4.0=h2c329e2_0
+  - libwebp-base=1.4.0=hd590300_0
+  - libxcb=1.17.0=h8a09558_0
+  - libxcrypt=4.4.36=hd590300_1
+  - libxkbcommon=1.7.0=h2c5496b_1
+  - libxml2=2.13.4=h064dc61_2
+  - libzlib=1.3.1=hb9d3cd8_2
+  - libzopfli=1.0.3=h9c3ff4c_0
+  - llvm-openmp=15.0.7=h0cdce71_0
+  - lz4-c=1.9.4=hcb278e6_0
+  - mkl=2022.1.0=hc2b9512_224
+  - mpc=1.3.1=h24ddda3_1
+  - mpfr=4.2.1=h90cbb55_3
+  - mpmath=1.3.0=pyhd8ed1ab_0
+  - ncurses=6.5=he02047a_1
+  - nettle=3.6=he412f7d_0
+  - networkx=3.4.2=pyhd8ed1ab_1
+  - nsight-compute=2024.3.2.3=0
+  - nspr=4.36=h5888daf_0
+  - nss=3.106=hdf54f9c_0
+  - numpy=1.26.4=py311h64a7726_0
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.5.2=h488ebb8_0
+  - openssl=3.3.2=hb9d3cd8_0
+  - p11-kit=0.24.1=hc5aa10d_0
+  - packaging=24.1=pyhd8ed1ab_0
+  - pcre2=10.44=hba22ea6_2
+  - pillow=10.4.0=py311h4aec55e_1
+  - pip=24.3.1=pyh8b19718_0
+  - pthread-stubs=0.4=hb9d3cd8_1002
+  - pycparser=2.22=pyhd8ed1ab_0
+  - pysocks=1.7.1=pyha2e5f31_6
+  - python=3.11.10=hc5c86c4_3_cpython
+  - python_abi=3.11=5_cp311
+  - pytorch=2.5.1=py3.11_cuda12.1_cudnn9.1.0_0
+  - pytorch-cuda=12.1=ha16c6d3_6
+  - pytorch-mutex=1.0=cuda
+  - pywavelets=1.7.0=py311h9f3472d_2
+  - pyyaml=6.0.2=py311h9ecbd09_1
+  - rav1e=0.6.6=he8a937b_2
+  - readline=8.2=h8228510_1
+  - requests=2.32.3=pyhd8ed1ab_0
+  - scikit-image=0.24.0=py311h7db5c69_3
+  - scipy=1.14.1=py311he9a78e4_1
+  - setuptools=75.3.0=pyhd8ed1ab_0
+  - snappy=1.2.1=ha2e4443_0
+  - svt-av1=1.7.0=h59595ed_0
+  - sysroot_linux-64=2.17=h4a8ded7_18
+  - tifffile=2024.9.20=pyhd8ed1ab_0
+  - tk=8.6.13=noxft_h4845f30_101
+  - torchaudio=2.5.1=py311_cu121
+  - torchtriton=3.1.0=py311
+  - torchvision=0.20.1=py311_cu121
+  - typing_extensions=4.12.2=pyha770c72_0
+  - urllib3=2.2.3=pyhd8ed1ab_0
+  - wayland=1.23.1=h3e06ad9_0
+  - wayland-protocols=1.37=hd8ed1ab_0
+  - wheel=0.45.0=pyhd8ed1ab_0
+  - x264=1!161.3030=h7f98852_1
+  - x265=3.5=h924138e_3
+  - xkeyboard-config=2.43=hb9d3cd8_0
+  - xorg-libx11=1.8.10=h4f16b4b_0
+  - xorg-libxau=1.0.11=hb9d3cd8_1
+  - xorg-libxdmcp=1.1.5=hb9d3cd8_0
+  - xorg-libxext=1.3.6=hb9d3cd8_0
+  - xorg-libxfixes=6.0.1=hb9d3cd8_0
+  - xorg-xorgproto=2024.1=hb9d3cd8_1
+  - xz=5.2.6=h166bdaf_0
+  - yaml=0.2.5=h7f98852_2
+  - zfp=1.0.1=h5888daf_2
+  - zipp=3.20.2=pyhd8ed1ab_0
+  - zlib-ng=2.0.7=h0b41bf4_0
+  - zstandard=0.23.0=py311hbc35293_1
+  - zstd=1.5.6=ha6fb4c9_0
+  - pip:
+      - accelerate==1.1.1
+      - aiofiles==23.2.1
+      - albucore==0.0.13
+      - albumentations==1.4.10
+      - annotated-types==0.7.0
+      - anyio==3.7.1
+      - argon2-cffi==23.1.0
+      - argon2-cffi-bindings==21.2.0
+      - arrow==1.3.0
+      - astor==0.8.1
+      - asttokens==2.4.1
+      - async-lru==2.0.4
+      - attrs==24.2.0
+      - azure-core==1.32.0
+      - azure-identity==1.19.0
+      - babel==2.16.0
+      - beautifulsoup4==4.12.3
+      - bleach==6.2.0
+      - click==8.1.7
+      - comm==0.2.2
+      - contourpy==1.3.0
+      - cryptography==43.0.3
+      - cycler==0.12.1
+      - cython==3.0.11
+      - debugpy==1.8.8
+      - decorator==5.1.1
+      - defusedxml==0.7.1
+      - dill==0.3.9
+      - distro==1.9.0
+      - easyocr==1.7.2
+      - einops==0.8.0
+      - executing==2.1.0
+      - fastapi==0.115.4
+      - fastjsonschema==2.20.0
+      - ffmpy==0.4.0
+      - fire==0.7.0
+      - flash-attn==2.6.3
+      - fonttools==4.54.1
+      - fqdn==1.5.1
+      - fsspec==2024.10.0
+      - gradio==5.5.0
+      - gradio-client==1.4.2
+      - h11==0.14.0
+      - httpcore==1.0.6
+      - httpx==0.27.2
+      - huggingface-hub==0.26.2
+      - imgaug==0.4.0
+      - ipykernel==6.29.5
+      - ipython==8.29.0
+      - isoduration==20.11.0
+      - jedi==0.19.1
+      - joblib==1.4.2
+      - json5==0.9.25
+      - jsonpointer==3.0.0
+      - jsonschema==4.23.0
+      - jsonschema-specifications==2024.10.1
+      - jupyter-client==8.6.3
+      - jupyter-core==5.7.2
+      - jupyter-events==0.10.0
+      - jupyter-lsp==2.2.5
+      - jupyter-server==2.14.2
+      - jupyter-server-terminals==0.5.3
+      - jupyterlab==4.3.0
+      - jupyterlab-pygments==0.3.0
+      - jupyterlab-server==2.27.3
+      - kiwisolver==1.4.7
+      - lmdb==1.5.1
+      - lxml==5.3.0
+      - markdown-it-py==3.0.0
+      - markupsafe==2.1.5
+      - matplotlib==3.9.2
+      - matplotlib-inline==0.1.7
+      - mdurl==0.1.2
+      - mistune==3.0.2
+      - msal==1.31.0
+      - msal-extensions==1.2.0
+      - nbclient==0.10.0
+      - nbconvert==7.16.4
+      - nbformat==5.10.4
+      - nest-asyncio==1.6.0
+      - ninja==1.11.1.1
+      - notebook-shim==0.2.4
+      - openai==1.3.5
+      - opencv-contrib-python==4.10.0.84
+      - opencv-python==4.10.0.84
+      - opencv-python-headless==4.10.0.84
+      - opt-einsum==3.3.0
+      - orjson==3.10.11
+      - overrides==7.7.0
+      - paddleocr==2.9.1
+      - paddlepaddle==2.6.2
+      - pandas==2.2.3
+      - pandocfilters==1.5.1
+      - parso==0.8.4
+      - pexpect==4.9.0
+      - platformdirs==4.3.6
+      - portalocker==2.10.1
+      - prometheus-client==0.21.0
+      - prompt-toolkit==3.0.48
+      - protobuf==5.28.3
+      - psutil==6.1.0
+      - ptyprocess==0.7.0
+      - pure-eval==0.2.3
+      - py-cpuinfo==9.0.0
+      - pyclipper==1.3.0.post6
+      - pydantic==2.9.2
+      - pydantic-core==2.23.4
+      - pydub==0.25.1
+      - pygments==2.18.0
+      - pyjwt==2.9.0
+      - pyparsing==3.2.0
+      - python-bidi==0.6.3
+      - python-dateutil==2.9.0.post0
+      - python-docx==1.1.2
+      - python-json-logger==2.0.7
+      - python-multipart==0.0.12
+      - pytz==2024.2
+      - pyzmq==26.2.0
+      - rapidfuzz==3.10.1
+      - referencing==0.35.1
+      - regex==2024.11.6
+      - rfc3339-validator==0.1.4
+      - rfc3986-validator==0.1.1
+      - rich==13.9.4
+      - rpds-py==0.21.0
+      - ruff==0.7.3
+      - safehttpx==0.1.1
+      - safetensors==0.4.5
+      - scikit-learn==1.5.2
+      - seaborn==0.13.2
+      - semantic-version==2.10.0
+      - send2trash==1.8.3
+      - shapely==2.0.6
+      - shellingham==1.5.4
+      - six==1.16.0
+      - sniffio==1.3.1
+      - soupsieve==2.6
+      - stack-data==0.6.3
+      - starlette==0.41.2
+      - supervision==0.18.0
+      - sympy==1.13.1
+      - termcolor==2.5.0
+      - terminado==0.18.1
+      - thop==0.1.1-2209072238
+      - threadpoolctl==3.5.0
+      - timm==1.0.11
+      - tinycss2==1.4.0
+      - tokenizers==0.20.3
+      - tomli==2.0.2
+      - tomlkit==0.12.0
+      - tornado==6.4.1
+      - tqdm==4.67.0
+      - traitlets==5.14.3
+      - transformers==4.46.2
+      - typer==0.13.0
+      - types-python-dateutil==2.9.0.20241003
+      - tzdata==2024.2
+      - ultralytics==8.1.24
+      - uri-template==1.3.0
+      - uvicorn==0.32.0
+      - wcwidth==0.2.13
+      - webcolors==24.8.0
+      - webencodings==0.5.1
+      - websocket-client==1.8.0
+      - websockets==12.0

main.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+#from florence import model_init, draw_image
+#from wikai import analyze_dial, ocr_and_od
+import matplotlib.pyplot as plt
+from aimodel import model_init, read_meter, visualization
+from test_rect import read_meter as read_meter_rect
+from PIL import Image
+import logging
+#logging.basicConfig(level=logging.DEBUG)
+print("Loading model...")
+fl, fl_ft = model_init(hack=False)
+def process_image(input_image:Image, meter_type:str):
+    if meter_type == "方形儀表":
+        value, img = read_meter_rect(input_image, fl, fl_ft)
+        return img, f"辨識結果: PA={value}", None
+    assert meter_type == "圓形儀表"
+    plt.clf()
+    print("process_image")
+    W, H = 640, 480
+    if input_image is None:
+        return None, None
+    meter_result  = read_meter(input_image, fl, fl_ft)
+    img, fig = visualization(meter_result)
+    return img, f"辨識結果: PSI={meter_result.needle_psi:.1f}  kg/cm²={meter_result.needle_kg_cm2:.2f} ", plt
+with gr.Blocks() as demo:
+    gr.Markdown("## 指針辨識系統\n請選擇儀表類型，上傳圖片，或點擊Submit")
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                clear_button = gr.ClearButton()
+                submit_button = gr.Button("Submit", variant="primary")
+                meter_type_dropdown = gr.Dropdown(choices=["圓形儀表", "方形儀表"], label="選擇選項")
+            image_input = gr.Image(type="pil", label="上傳圖片")
+        with gr.Column():
+            number_output = gr.Textbox(label="辨識結果", placeholder="辨識結果")
+            image_output = gr.Image(label="輸出圖片")
+            plot_output = gr.Plot(label="模型結果")
+    clear_button.add([image_input, image_output, number_output])
+    image_input.upload(
+        fn=process_image,
+        inputs=[image_input, meter_type_dropdown],
+        outputs=[image_output, number_output, plot_output],
+        queue=False
+    )
+    submit_button.click(
+        fn=process_image,
+        inputs=[image_input, meter_type_dropdown],
+        outputs=[image_output, number_output, plot_output],
+    )
+demo.launch(debug=True)

readme.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+use `environment.yml` to recreate the conda environment.
+use `python main.py` or `gradio main.py` to run the example.
+images/good collects good images

test_rect.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# %%
+import spaces
+import matplotlib.style
+from transformers import AutoProcessor, AutoModelForCausalLM
+from PIL import Image
+import torch
+from pathlib import Path
+from PIL import Image
+from PIL import ImageDraw
+from IPython.display import display
+import numpy as np
+from collections import namedtuple
+import sys
+print(sys.version_info)
+#%%
+class Florence:
+    def __init__(self, model_id:str, hack=False):
+        if hack:
+            return
+        self.model = (
+            AutoModelForCausalLM.from_pretrained(
+                model_id, trust_remote_code=True, torch_dtype="auto"
+            )
+            .eval()
+            .cuda()
+        )
+        self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+        self.model_id = model_id
+    def run(self, img:Image, task_prompt:str, extra_text:str|None=None):
+        model, processor = self.model, self.processor
+        prompt = task_prompt + (extra_text if extra_text else "")
+        inputs = processor(text=prompt, images=img, return_tensors="pt").to(
+            "cuda", torch.float16
+        )
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            early_stopping=False,
+            do_sample=False,
+            num_beams=3,
+        )
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        parsed_answer = processor.post_process_generation(
+            generated_text,
+            task=task_prompt,
+            image_size=(img.width, img.height),
+        )
+        return parsed_answer
+def model_init():
+    fl = Florence("microsoft/Florence-2-large", hack=False)
+    fl_ft = Florence("microsoft/Florence-2-large-ft", hack=False)
+    return fl, fl_ft
+# florence-2 tasks
+TASK_OD = "<OD>"
+TASK_SEGMENTATION = '<REFERRING_EXPRESSION_SEGMENTATION>'
+TASK_CAPTION = "<CAPTION_TO_PHRASE_GROUNDING>"
+TASK_OCR = "<OCR_WITH_REGION>"
+TASK_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
+#%%
+from skimage.measure import LineModelND, ransac
+def get_polygons(fl:Florence, img2:Image, prompt):
+    parsed_answer = fl.run(img2, TASK_SEGMENTATION, prompt)
+    assert len(parsed_answer) == 1
+    k,v = parsed_answer.popitem()
+    assert 'polygons' in v
+    assert len(v['polygons']) == 1
+    polygons = v['polygons'][0]
+    return polygons
+def get_ocr(fl:Florence, img2:Image):
+    parsed_answer = fl.run(img2, TASK_OCR)
+    assert len(parsed_answer)==1
+    k,v = parsed_answer.popitem()
+    return v
+imgs = list(Path('images/other').glob('*.jpg'))
+meter_labels = list(map(str, range(0, 600, 100)))
+def read_meter(img, fl:Florence, fl_ft:Florence):
+    if isinstance(img, str) or isinstance(img, Path):
+        print(img)
+        img = Image.open(img)
+    red_polygons = get_polygons(fl, img, 'red triangle pointer')
+    # draw the rectangle
+    draw = ImageDraw.Draw(img)
+    ocr_text = {}
+    ocr1 = get_ocr(fl, img)
+    ocr2 = get_ocr(fl_ft, img)
+    quad_boxes = ocr1['quad_boxes']+ocr2['quad_boxes']
+    labels = ocr1['labels']+ocr2['labels']
+    for quad_box, label in zip(quad_boxes, labels):
+        if label in meter_labels:
+            ocr_text[int(label)] = quad_box
+    for label, quad_box in ocr_text.items():
+        draw.polygon(quad_box, outline='green', width=3)
+        draw.text((quad_box[0], quad_box[1]-10), str(label), fill='green', anchor='ls')
+    text_centers = np.array(list(ocr_text.values())).reshape(-1, 4, 2).mean(axis=1)
+    lm = LineModelND()
+    lm.estimate(text_centers)
+    orign, direction = lm.params
+    # project text centers to the line
+    text_centers_shifted = text_centers - orign
+    text_centers_norm = text_centers_shifted @ direction
+    lm2 = LineModelND()
+    I = np.array(list(ocr_text.keys()))
+    L = text_centers_norm
+    data = np.stack([I, L], axis=1)
+    lm2.estimate(data)
+    ls = lm2.predict(list(range(0, 600, 100)))[:, 1]
+    x0, y0 = ls[0] * direction + orign
+    x1, y1 = ls[-1] * direction + orign
+    draw.line((x0, y0, x1, y1), fill='yellow', width=3)
+    for l in ls:
+        x, y = l * direction + orign
+        draw.ellipse((x-5, y-5, x+5, y+5), outline='yellow', width=3)
+    red_coords = np.concatenate(red_polygons).reshape(-1, 2)
+    red_shifted = red_coords - orign
+    red_norm = red_shifted @ direction
+    red_l = red_norm.mean()
+    red_i = np.clip(lm2.predict_x([red_l]), 0, 500)
+    red_l = lm2.predict_y(red_i)[0]
+    red_center = red_l * direction + orign
+    draw.ellipse((red_center[0]-5, red_center[1]-5, red_center[0]+5, red_center[1]+5), outline='red', width=3)
+    return red_i[0], img
+@spaces.GPU
+def main():
+    fl, fl_ft = model_init()
+    for img_fn in imgs:
+        print(img_fn)
+        img = Image.open(img_fn)
+        red_i, img2 = read_meater(img, fl, fl_ft)
+        print(red_i)
+        display(img2)
+if __name__ == '__main__':
+    main()
+#%%