Cloud Firestore Enterprise edition in Native mode is now available! Learn more.

ค้นหาด้วยการฝังเวกเตอร์

หน้านี้จะแสดงวิธีใช้ Cloud Firestore เพื่อทำการค้นหาเวกเตอร์ K-Nearest Neighbor (KNN) โดยใช้เทคนิคต่อไปนี้

ค่าเวกเตอร์ของร้านค้า
สร้างและจัดการดัชนีเวกเตอร์ KNN
สร้างการค้นหา K-Nearest-Neighbor (KNN) โดยใช้การวัดระยะทางเวกเตอร์ที่รองรับอย่างใดอย่างหนึ่ง

ก่อนเริ่มต้น

คุณต้องสร้างการฝังเวกเตอร์ก่อนจึงจะจัดเก็บการฝังใน Cloud Firestore ได้ Cloud Firestore ไม่ได้สร้างการฝัง คุณสามารถใช้บริการ เช่น Vertex AI เพื่อสร้างค่าเวกเตอร์ เช่น การฝังข้อความจากข้อมูล Cloud Firestore จากนั้นคุณจะจัดเก็บการฝังเหล่านี้กลับไปในCloud Firestoreเอกสารได้

ดูข้อมูลเพิ่มเติมเกี่ยวกับ Embedding ได้ที่Embedding คืออะไร

ดูวิธีรับการฝังข้อความด้วย Vertex AI ได้ที่รับการฝังข้อความ

จัดเก็บการฝังเวกเตอร์

ตัวอย่างต่อไปนี้แสดงวิธีจัดเก็บการฝังเวกเตอร์ใน Cloud Firestore

การดำเนินการเขียนด้วยการฝังเวกเตอร์

ตัวอย่างต่อไปนี้แสดงวิธีจัดเก็บการฝังเวกเตอร์ในเอกสาร Cloud Firestore

Python

from google.cloud import firestore
from google.cloud.firestore_v1.vector import Vector

firestore_client = firestore.Client()
collection = firestore_client.collection("coffee-beans")
doc = {
    "name": "Kahawa coffee beans",
    "description": "Information about the Kahawa coffee beans.",
    "embedding_field": Vector([0.18332680, 0.24160706, 0.3416704]),
}

collection.add(doc)vector_search.py

Node.js

import {
  Firestore,
  FieldValue,
} from "@google-cloud/firestore";

const db = new Firestore();
const coll = db.collection('coffee-beans');
await coll.add({
  name: "Kahawa coffee beans",
  description: "Information about the Kahawa coffee beans.",
  embedding_field: FieldValue.vector([1.0 , 2.0, 3.0])
});

Go

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/firestore"
)

type CoffeeBean struct {
	Name           string             `firestore:"name,omitempty"`
	Description    string             `firestore:"description,omitempty"`
	EmbeddingField firestore.Vector32 `firestore:"embedding_field,omitempty"`
	Color          string             `firestore:"color,omitempty"`
}

func storeVectors(w io.Writer, projectID string) error {
	ctx := context.Background()

	// Create client
	client, err := firestore.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("firestore.NewClient: %w", err)
	}
	defer client.Close()

	// Vector can be represented by Vector32 or Vector64
	doc := CoffeeBean{
		Name:           "Kahawa coffee beans",
		Description:    "Information about the Kahawa coffee beans.",
		EmbeddingField: []float32{1.0, 2.0, 3.0},
		Color:          "red",
	}
	ref := client.Collection("coffee-beans").NewDoc()
	if _, err = ref.Set(ctx, doc); err != nil {
		fmt.Fprintf(w, "failed to upsert: %v", err)
		return err
	}

	return nil
}
vector_store.go

Java

import com.google.cloud.firestore.CollectionReference;
import com.google.cloud.firestore.DocumentReference;
import com.google.cloud.firestore.FieldValue;
import com.google.cloud.firestore.VectorQuery;

CollectionReference coll = firestore.collection("coffee-beans");

Map<String, Object> docData = new HashMap<>();
docData.put("name", "Kahawa coffee beans");
docData.put("description", "Information about the Kahawa coffee beans.");
docData.put("embedding_field", FieldValue.vector(new double[] {1.0, 2.0, 3.0}));

ApiFuture<DocumentReference> future = coll.add(docData);
DocumentReference documentReference = future.get();

คำนวณการฝังเวกเตอร์ด้วย Cloud Functions

หากต้องการคำนวณและจัดเก็บการฝังเวกเตอร์ทุกครั้งที่มีการอัปเดตหรือสร้างเอกสาร คุณสามารถตั้งค่า Cloud Functions ได้โดยทำดังนี้

Python

@functions_framework.cloud_event
def store_embedding(cloud_event) -> None:
  """Triggers by a change to a Firestore document.
  """
  firestore_payload = firestore.DocumentEventData()
  payload = firestore_payload._pb.ParseFromString(cloud_event.data)

  collection_id, doc_id = from_payload(payload)
  # Call a function to calculate the embedding
  embedding = calculate_embedding(payload)
  # Update the document
  doc = firestore_client.collection(collection_id).document(doc_id)
  doc.set({"embedding_field": embedding}, merge=True)

Node.js

/**
 * A vector embedding will be computed from the
 * value of the `content` field. The vector value
 * will be stored in the `embedding` field. The
 * field names `content` and `embedding` are arbitrary
 * field names chosen for this example.
 */
async function storeEmbedding(event: FirestoreEvent<any>): Promise<void> {
  // Get the previous value of the document's `content` field.
  const previousDocumentSnapshot = event.data.before as QueryDocumentSnapshot;
  const previousContent = previousDocumentSnapshot.get("content");

  // Get the current value of the document's `content` field.
  const currentDocumentSnapshot = event.data.after as QueryDocumentSnapshot;
  const currentContent = currentDocumentSnapshot.get("content");

  // Don't update the embedding if the content field did not change
  if (previousContent === currentContent) {
    return;
  }

  // Call a function to calculate the embedding for the value
  // of the `content` field.
  const embeddingVector = calculateEmbedding(currentContent);

  // Update the `embedding` field on the document.
  await currentDocumentSnapshot.ref.update({
    embedding: embeddingVector,
  });
}

Go

  // Not yet supported in the Go client library

Java

  // Not yet supported in the Java client library

สร้างและจัดการดัชนีเวกเตอร์

ก่อนที่จะทำการค้นหาเพื่อนบ้านที่ใกล้ที่สุดด้วยการฝังเวกเตอร์ได้ คุณต้องสร้างดัชนีที่สอดคล้องกัน ตัวอย่างต่อไปนี้แสดงวิธีสร้างและจัดการดัชนีเวกเตอร์ด้วย Google Cloud CLI และคอนโซล นอกจากนี้ คุณยังจัดการดัชนีเวกเตอร์ด้วย Firebase CLI และ Terraform ได้ด้วย

สร้างดัชนีเวกเตอร์

คอนโซล Google Cloud

วิธีสร้างดัชนีใหม่ด้วยตนเองจากคอนโซล Google Cloud

ในคอนโซล Google Cloud ให้ไปที่หน้าฐานข้อมูล
ไปที่ฐานข้อมูล
เลือกฐานข้อมูลที่ต้องการจากรายการฐานข้อมูล
ในเมนูการนำทาง ให้คลิกดัชนี แล้วคลิกแท็บด้วยตนเอง
คลิกสร้างดัชนี
หากต้องการจัดทำดัชนีฟิลด์เวกเตอร์สำหรับการค้นหาเวกเตอร์ ให้เลือกสร้างดัชนีเวกเตอร์
ป้อนรหัสคอลเล็กชัน ป้อนเส้นทางฟิลด์เวกเตอร์และจำนวนมิติข้อมูลการฝังเวกเตอร์ เพิ่มชื่อของช่องเพิ่มเติมที่ต้องการจัดทำดัชนีและโหมดดัชนีสำหรับแต่ละช่อง

คลิกบันทึกดัชนี

ดัชนีใหม่จะปรากฏในรายการดัชนีที่สร้างขึ้นด้วยตนเอง และ Cloud Firestore จะเริ่มสร้างดัชนี เมื่อสร้างดัชนีเสร็จแล้ว คุณจะเห็นเครื่องหมายถูกสีเขียวข้างดัชนี

gcloud

ก่อนสร้างดัชนีเวกเตอร์ ให้อัปเกรดเป็น Google Cloud CLI เวอร์ชันล่าสุดโดยทำดังนี้

gcloud components update

หากต้องการสร้างดัชนีเวกเตอร์ ให้ใช้ gcloud firestore indexes composite create

gcloud firestore indexes composite create \
--collection-group=collection-group \
--query-scope=COLLECTION \
--field-config field-path=vector-field,vector-config='vector-configuration' \
--database=database-id

ที่ไหน

collection-group คือรหัสของกลุ่มคอลเล็กชัน
vector-field คือชื่อของฟิลด์ที่มีการฝังเวกเตอร์
database-id คือรหัสของฐานข้อมูล
vector-configuration มีเวกเตอร์ dimension และประเภทดัชนี dimension เป็นจำนวนเต็มไม่เกิน 2048 ประเภทดัชนีต้องเป็น flat จัดรูปแบบการกำหนดค่าดัชนีดังนี้ {"dimension":"DIMENSION", "flat": "{}"}

ตัวอย่างต่อไปนี้สร้างดัชนีผสม ซึ่งรวมถึงดัชนีเวกเตอร์ สำหรับฟิลด์ vector-field และดัชนีจากน้อยไปมากสำหรับฟิลด์ color คุณใช้ดัชนีประเภทนี้เพื่อกรองข้อมูลล่วงหน้าก่อนการค้นหาเพื่อนบ้านที่ใกล้ที่สุดได้

gcloud firestore indexes composite create \
--collection-group=collection-group \
--query-scope=COLLECTION \
--field-config=order=ASCENDING,field-path="color" \
--field-config field-path=vector-field,vector-config='{"dimension":"1024", "flat": "{}"}' \
--database=database-id

แสดงรายการดัชนีเวกเตอร์ทั้งหมด

คอนโซล Google Cloud

ในคอนโซล Google Cloud ให้ไปที่หน้าฐานข้อมูล
ไปที่ฐานข้อมูล
เลือกฐานข้อมูลที่ต้องการจากรายการฐานข้อมูล
ในเมนูการนำทาง ให้คลิกดัชนี แล้วคลิกแท็บด้วยตนเอง

ตารางดัชนีจะแสดงดัชนีทั้งหมดของฐานข้อมูล ดัชนีเวกเตอร์มีฟิลด์เวกเตอร์ที่มีไอคอน

gcloud

วิธีแสดงรายการดัชนีทั้งหมดและเรียกข้อมูลรหัสดัชนี

gcloud firestore indexes composite list --database=database-id

แทนที่ database-id ด้วยรหัสของฐานข้อมูล

คุณใช้รหัสดัชนีเพื่อดูรายละเอียดเพิ่มเติมเกี่ยวกับดัชนีได้ดังนี้

gcloud firestore indexes composite describe index-id --database=database-id

ที่ไหน

index-id คือรหัสของดัชนีที่จะอธิบาย
database-id คือรหัสของฐานข้อมูล

ลบดัชนีเวกเตอร์

คอนโซล Google Cloud

ในคอนโซล Google Cloud ให้ไปที่หน้าฐานข้อมูล
ไปที่ฐานข้อมูล
เลือกฐานข้อมูลที่ต้องการจากรายการฐานข้อมูล
ในเมนูการนำทาง ให้คลิกดัชนี แล้วคลิกแท็บด้วยตนเอง
ในรายการดัชนีที่สร้างขึ้นเอง ให้คลิกปุ่มเพิ่มเติม สำหรับดัชนีที่ต้องการ ลบ คลิกลบ
ยืนยันว่าคุณต้องการลบดัชนีนี้โดยคลิกลบดัชนี จากการแจ้งเตือน

gcloud

gcloud firestore indexes composite delete index-id --database=database-id

ที่ไหน

index-id คือรหัสของดัชนีที่จะลบ ใช้ indexes composite list เพื่อดึงข้อมูลรหัสดัชนี
database-id คือรหัสของฐานข้อมูล

ทำการค้นหาแบบ Nearest Neighbor

คุณสามารถทำการค้นหาความคล้ายคลึงเพื่อค้นหาจุดข้อมูลที่อยู่ใกล้ที่สุดของ การฝังเวกเตอร์ การค้นหาความคล้ายคลึงต้องใช้ดัชนีเวกเตอร์ หากไม่มีดัชนีอยู่ Cloud Firestore จะแนะนำดัชนีที่จะสร้าง โดยใช้ gcloud CLI

ตัวอย่างต่อไปนี้จะค้นหาจุดข้อมูลข้างเคียงที่อยู่ใกล้ที่สุด 10 จุดของเวกเตอร์คำค้นหา

Python

from google.cloud.firestore_v1.base_vector_query import DistanceMeasure
from google.cloud.firestore_v1.vector import Vector

collection = db.collection("coffee-beans")

# Requires a single-field vector index
vector_query = collection.find_nearest(
    vector_field="embedding_field",
    query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
    distance_measure=DistanceMeasure.EUCLIDEAN,
    limit=5,
)vector_search.py

Node.js

import {
  Firestore,
  FieldValue,
  VectorQuery,
  VectorQuerySnapshot,
} from "@google-cloud/firestore";

// Requires a single-field vector index
const vectorQuery: VectorQuery = coll.findNearest({
  vectorField: 'embedding_field',
  queryVector: [3.0, 1.0, 2.0],
  limit: 10,
  distanceMeasure: 'EUCLIDEAN'
});

const vectorQuerySnapshot: VectorQuerySnapshot = await vectorQuery.get();

Go

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/firestore"
)

func vectorSearchBasic(w io.Writer, projectID string) error {
	ctx := context.Background()

	// Create client
	client, err := firestore.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("firestore.NewClient: %w", err)
	}
	defer client.Close()

	collection := client.Collection("coffee-beans")

	// Requires a vector index
	// https://firebase.google.com/docs/firestore/vector-search#create_and_manage_vector_indexes
	vectorQuery := collection.FindNearest("embedding_field",
		[]float32{3.0, 1.0, 2.0},
		5,
		// More info: https://firebase.google.com/docs/firestore/vector-search#vector_distances
		firestore.DistanceMeasureEuclidean,
		nil)

	docs, err := vectorQuery.Documents(ctx).GetAll()
	if err != nil {
		fmt.Fprintf(w, "failed to get vector query results: %v", err)
		return err
	}

	for _, doc := range docs {
		fmt.Fprintln(w, doc.Data()["name"])
	}
	return nil
}
vector_search_basic.go

Java

import com.google.cloud.firestore.VectorQuery;
import com.google.cloud.firestore.VectorQuerySnapshot;

VectorQuery vectorQuery = coll.findNearest(
        "embedding_field",
        new double[] {3.0, 1.0, 2.0},
        /* limit */ 10,
        VectorQuery.DistanceMeasure.EUCLIDEAN);

ApiFuture<VectorQuerySnapshot> future = vectorQuery.get();
VectorQuerySnapshot vectorQuerySnapshot = future.get();

ระยะทางเวกเตอร์

การค้นหาเพื่อนบ้านที่ใกล้ที่สุดรองรับตัวเลือกต่อไปนี้สำหรับระยะทางเวกเตอร์

EUCLIDEAN: วัดEUCLIDEANระยะทางระหว่างเวกเตอร์ ดูข้อมูลเพิ่มเติมได้ที่ Euclidean
COSINE: เปรียบเทียบเวกเตอร์ตามมุมระหว่างเวกเตอร์ ซึ่งช่วยให้คุณวัดความคล้ายคลึงที่ไม่ขึ้นอยู่กับขนาดของเวกเตอร์ได้ เราขอแนะนำให้ใช้ DOT_PRODUCT กับเวกเตอร์ที่ทำให้เป็นหน่วยแทนระยะทางโคไซน์ ซึ่งเทียบเท่ากันในทางคณิตศาสตร์และมีประสิทธิภาพดีกว่า ดูข้อมูลเพิ่มเติมได้ที่ ความคล้ายกันของโคไซน์
DOT_PRODUCT: คล้ายกับ COSINE แต่ได้รับผลกระทบจากขนาดของเวกเตอร์ ดูข้อมูลเพิ่มเติมได้ที่ ดอทโปรดักต์

เลือกการวัดระยะทาง

คุณสามารถ ระบุการวัดระยะทางที่จะใช้เพื่อค้นหาการวัดระยะทางได้ โดยขึ้นอยู่กับว่ามีการทําให้การฝังเวกเตอร์ทั้งหมดเป็นมาตรฐานหรือไม่ การฝังเวกเตอร์ที่ทำให้เป็นมาตรฐาน มีขนาด (ความยาว) เท่ากับ 1.0

นอกจากนี้ หากคุณทราบว่าโมเดลได้รับการฝึกด้วยการวัดระยะทางใด ให้ใช้การวัดระยะทางนั้นเพื่อคำนวณระยะห่างระหว่างการฝังเวกเตอร์

ข้อมูลที่ปรับให้เป็นมาตรฐาน

หากคุณมีชุดข้อมูลที่การฝังเวกเตอร์ทั้งหมดได้รับการทำให้เป็นมาตรฐานแล้ว เมตริกการวัดระยะทางทั้ง 3 รายการจะให้ผลการค้นหาเชิงความหมายเดียวกัน กล่าวโดยสรุปคือ แม้ว่าการวัดระยะทางแต่ละครั้งจะแสดงค่าที่แตกต่างกัน แต่ค่าเหล่านั้นจะเรียงลำดับในลักษณะเดียวกัน เมื่อทำให้การฝังเป็นมาตรฐานแล้ว DOT_PRODUCT มักจะมีประสิทธิภาพด้านการคำนวณมากที่สุด แต่ความแตกต่างนั้นเล็กน้อยในกรณีส่วนใหญ่ อย่างไรก็ตาม หากแอปพลิเคชันของคุณมีความไวต่อประสิทธิภาพสูง DOT_PRODUCT อาจช่วยในการปรับแต่งประสิทธิภาพได้

ข้อมูลที่ไม่ได้ทำให้เป็นมาตรฐาน

หากคุณมีชุดข้อมูลที่ไม่ได้ทำให้การฝังเวกเตอร์เป็นมาตรฐาน การใช้ DOT_PRODUCT เป็นการวัดระยะทาง จึงไม่ถูกต้องตามหลักคณิตศาสตร์ เนื่องจากผลคูณจุดไม่ได้วัดระยะทาง ไม่ว่าจะเป็นการวัดระยะทาง COSINE หรือ EUCLIDEAN ก็จะให้ผลการค้นหาที่อาจดีกว่าการวัดระยะทางอื่นๆ ขึ้นอยู่กับวิธีสร้างการฝังและประเภทการค้นหาที่ต้องการ คุณอาจต้องทำการทดลองกับ COSINE หรือ EUCLIDEAN เพื่อพิจารณาว่าตัวเลือกใดเหมาะกับกรณีการใช้งานของคุณมากที่สุด

ไม่แน่ใจว่าข้อมูลเป็นข้อมูลที่ปรับให้เป็นมาตรฐานหรือไม่

หากไม่แน่ใจว่าข้อมูลเป็นมาตรฐานหรือไม่และต้องการใช้ DOT_PRODUCT เราขอแนะนำให้ใช้ COSINE แทน COSINE เหมือนกับ DOT_PRODUCT ที่มีการสร้างการทำให้เป็นมาตรฐานไว้ในตัว ระยะทางที่วัดโดยใช้ COSINE มีตั้งแต่ 0 ถึง 2 ผลลัพธ์ ที่ใกล้เคียงกับ 0 แสดงว่าเวกเตอร์มีความคล้ายคลึงกันมาก

กรองเอกสารล่วงหน้า

หากต้องการกรองเอกสารล่วงหน้าก่อนค้นหาเพื่อนบ้านที่ใกล้ที่สุด คุณสามารถรวมการค้นหาความคล้ายคลึงกับโอเปอเรเตอร์การค้นหาอื่นๆ ได้ รองรับฟิลเตอร์คอมโพสิต and และ or ดูข้อมูลเพิ่มเติมเกี่ยวกับตัวกรองฟิลด์ที่รองรับได้ที่โอเปอเรเตอร์การค้นหา

Python

from google.cloud.firestore_v1.base_vector_query import DistanceMeasure
from google.cloud.firestore_v1.vector import Vector

collection = db.collection("coffee-beans")

# Similarity search with pre-filter
# Requires a composite vector index
vector_query = collection.where("color", "==", "red").find_nearest(
    vector_field="embedding_field",
    query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
    distance_measure=DistanceMeasure.EUCLIDEAN,
    limit=5,
)vector_search.py

Node.js

// Similarity search with pre-filter
// Requires composite vector index
const preFilteredVectorQuery: VectorQuery = coll
    .where("color", "==", "red")
    .findNearest({
      vectorField: "embedding_field",
      queryVector: [3.0, 1.0, 2.0],
      limit: 5,
      distanceMeasure: "EUCLIDEAN",
    });

const vectorQueryResults = await preFilteredVectorQuery.get();

Go

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/firestore"
)

func vectorSearchPrefilter(w io.Writer, projectID string) error {
	ctx := context.Background()

	// Create client
	client, err := firestore.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("firestore.NewClient: %w", err)
	}
	defer client.Close()

	collection := client.Collection("coffee-beans")

	// Similarity search with pre-filter
	// Requires a composite vector index
	vectorQuery := collection.Where("color", "==", "red").
		FindNearest("embedding_field",
			[]float32{3.0, 1.0, 2.0},
			5,
			// More info: https://firebase.google.com/docs/firestore/vector-search#vector_distances
			firestore.DistanceMeasureEuclidean,
			nil)

	docs, err := vectorQuery.Documents(ctx).GetAll()
	if err != nil {
		fmt.Fprintf(w, "failed to get vector query results: %v", err)
		return err
	}

	for _, doc := range docs {
		fmt.Fprintln(w, doc.Data()["name"])
	}
	return nil
}
vector_search_prefilter.go

Java

import com.google.cloud.firestore.VectorQuery;
import com.google.cloud.firestore.VectorQuerySnapshot;

VectorQuery preFilteredVectorQuery = coll
        .whereEqualTo("color", "red")
        .findNearest(
                "embedding_field",
                new double[] {3.0, 1.0, 2.0},
                /* limit */ 10,
                VectorQuery.DistanceMeasure.EUCLIDEAN);

ApiFuture<VectorQuerySnapshot> future = preFilteredVectorQuery.get();
VectorQuerySnapshot vectorQuerySnapshot = future.get();

ดึงข้อมูลระยะทางเวกเตอร์ที่คำนวณแล้ว

คุณเรียกข้อมูลระยะทางเวกเตอร์ที่คำนวณแล้วได้โดยกำหนดdistance_result_fieldชื่อพร็อพเพอร์ตี้เอาต์พุตในคำค้นหา FindNearest ดังที่แสดงในตัวอย่างต่อไปนี้

Python

from google.cloud.firestore_v1.base_vector_query import DistanceMeasure
from google.cloud.firestore_v1.vector import Vector

collection = db.collection("coffee-beans")

vector_query = collection.find_nearest(
    vector_field="embedding_field",
    query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
    distance_measure=DistanceMeasure.EUCLIDEAN,
    limit=10,
    distance_result_field="vector_distance",
)

docs = vector_query.stream()

for doc in docs:
    print(f"{doc.id}, Distance: {doc.get('vector_distance')}")vector_search.py

Node.js

const vectorQuery: VectorQuery = coll.findNearest(
    {
      vectorField: 'embedding_field',
      queryVector: [3.0, 1.0, 2.0],
      limit: 10,
      distanceMeasure: 'EUCLIDEAN',
      distanceResultField: 'vector_distance'
    });

const snapshot: VectorQuerySnapshot = await vectorQuery.get();

snapshot.forEach((doc) => {
  console.log(doc.id, ' Distance: ', doc.get('vector_distance'));
});

Go

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/firestore"
)

func vectorSearchDistanceResultField(w io.Writer, projectID string) error {
	ctx := context.Background()

	client, err := firestore.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("firestore.NewClient: %w", err)
	}
	defer client.Close()

	collection := client.Collection("coffee-beans")

	// Requires a vector index
	// https://firebase.google.com/docs/firestore/vector-search#create_and_manage_vector_indexes
	vectorQuery := collection.FindNearest("embedding_field",
		[]float32{3.0, 1.0, 2.0},
		10,
		firestore.DistanceMeasureEuclidean,
		&firestore.FindNearestOptions{
			DistanceResultField: "vector_distance",
		})

	docs, err := vectorQuery.Documents(ctx).GetAll()
	if err != nil {
		fmt.Fprintf(w, "failed to get vector query results: %v", err)
		return err
	}

	for _, doc := range docs {
		fmt.Fprintf(w, "%v, Distance: %v\n", doc.Data()["name"], doc.Data()["vector_distance"])
	}
	return nil
}
vector_search_result_field.go

Java

import com.google.cloud.firestore.VectorQuery;
import com.google.cloud.firestore.VectorQueryOptions;
import com.google.cloud.firestore.VectorQuerySnapshot;

VectorQuery vectorQuery = coll.findNearest(
        "embedding_field",
        new double[] {3.0, 1.0, 2.0},
        /* limit */ 10,
        VectorQuery.DistanceMeasure.EUCLIDEAN,
        VectorQueryOptions.newBuilder().setDistanceResultField("vector_distance").build());

ApiFuture<VectorQuerySnapshot> future = vectorQuery.get();
VectorQuerySnapshot vectorQuerySnapshot = future.get();

for (DocumentSnapshot document : vectorQuerySnapshot.getDocuments()) {
    System.out.println(document.getId() + " Distance: " + document.get("vector_distance"));
}

หากต้องการใช้ Field Mask เพื่อแสดงผลชุดย่อยของช่องเอกสารพร้อมกับ distanceResultField คุณต้องใส่ค่าของ distanceResultField ใน Field Mask ด้วย ดังที่แสดงในตัวอย่างต่อไปนี้

Python

vector_query = collection.select(["color", "vector_distance"]).find_nearest(
    vector_field="embedding_field",
    query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
    distance_measure=DistanceMeasure.EUCLIDEAN,
    limit=10,
    distance_result_field="vector_distance",
)vector_search.py

Node.js

const vectorQuery: VectorQuery = coll
    .select('name', 'description', 'vector_distance')
    .findNearest({
      vectorField: 'embedding_field',
      queryVector: [3.0, 1.0, 2.0],
      limit: 10,
      distanceMeasure: 'EUCLIDEAN',
      distanceResultField: 'vector_distance'
    });

Go

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/firestore"
)

func vectorSearchDistanceResultFieldMasked(w io.Writer, projectID string) error {
	ctx := context.Background()

	client, err := firestore.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("firestore.NewClient: %w", err)
	}
	defer client.Close()

	collection := client.Collection("coffee-beans")

	// Requires a vector index
	// https://firebase.google.com/docs/firestore/vector-search#create_and_manage_vector_indexes
	vectorQuery := collection.Select("color", "vector_distance").
		FindNearest("embedding_field",
			[]float32{3.0, 1.0, 2.0},
			10,
			firestore.DistanceMeasureEuclidean,
			&firestore.FindNearestOptions{
				DistanceResultField: "vector_distance",
			})

	docs, err := vectorQuery.Documents(ctx).GetAll()
	if err != nil {
		fmt.Fprintf(w, "failed to get vector query results: %v", err)
		return err
	}

	for _, doc := range docs {
		fmt.Fprintf(w, "%v, Distance: %v\n", doc.Data()["color"], doc.Data()["vector_distance"])
	}
	return nil
}
vector_search_result_field_masked.go

Java

import com.google.cloud.firestore.VectorQuery;
import com.google.cloud.firestore.VectorQueryOptions;
import com.google.cloud.firestore.VectorQuerySnapshot;

VectorQuery vectorQuery = coll
        .select("name", "description", "vector_distance")
        .findNearest(
          "embedding_field",
          new double[] {3.0, 1.0, 2.0},
          /* limit */ 10,
          VectorQuery.DistanceMeasure.EUCLIDEAN,
          VectorQueryOptions.newBuilder()
            .setDistanceResultField("vector_distance")
            .build());

ApiFuture<VectorQuerySnapshot> future = vectorQuery.get();
VectorQuerySnapshot vectorQuerySnapshot = future.get();

for (DocumentSnapshot document : vectorQuerySnapshot.getDocuments()) {
    System.out.println(document.getId() + " Distance: " + document.get("vector_distance"));
}

ระบุเกณฑ์ระยะทาง

คุณระบุเกณฑ์ความคล้ายกันที่จะแสดงเฉพาะเอกสารภายในเกณฑ์ได้ ลักษณะการทำงานของช่องเกณฑ์ขึ้นอยู่กับหน่วยวัดระยะทาง ที่คุณเลือก ดังนี้

ระยะทาง EUCLIDEAN และ COSINE จะจำกัดเกณฑ์ไว้ที่เอกสารซึ่งมี ระยะทางน้อยกว่าหรือเท่ากับเกณฑ์ที่ระบุ ค่าการวัดระยะทางเหล่านี้จะลดลงเมื่อเวกเตอร์มีความคล้ายคลึงกันมากขึ้น
DOT_PRODUCT distance จะจำกัดเกณฑ์ไว้ที่เอกสารซึ่งมีระยะทาง มากกว่าหรือเท่ากับเกณฑ์ที่ระบุ ระยะทางของผลคูณจุด จะเพิ่มขึ้นเมื่อเวกเตอร์มีความคล้ายกันมากขึ้น

ตัวอย่างต่อไปนี้แสดงวิธีระบุเกณฑ์ระยะทางเพื่อแสดงเอกสารที่ใกล้ที่สุดสูงสุด 10 รายการซึ่งอยู่ห่างกันไม่เกิน 4.5 หน่วยโดยใช้เมตริกระยะทาง EUCLIDEAN

Python

from google.cloud.firestore_v1.base_vector_query import DistanceMeasure
from google.cloud.firestore_v1.vector import Vector

collection = db.collection("coffee-beans")

vector_query = collection.find_nearest(
    vector_field="embedding_field",
    query_vector=Vector([0.3416704, 0.18332680, 0.24160706]),
    distance_measure=DistanceMeasure.EUCLIDEAN,
    limit=10,
    distance_threshold=4.5,
)

docs = vector_query.stream()

for doc in docs:
    print(f"{doc.id}")vector_search.py

Node.js

const vectorQuery: VectorQuery = coll.findNearest({
  vectorField: 'embedding_field',
  queryVector: [3.0, 1.0, 2.0],
  limit: 10,
  distanceMeasure: 'EUCLIDEAN',
  distanceThreshold: 4.5
});

const snapshot: VectorQuerySnapshot = await vectorQuery.get();

snapshot.forEach((doc) => {
  console.log(doc.id);
});

Go

import (
	"context"
	"fmt"
	"io"

	"cloud.google.com/go/firestore"
)

func vectorSearchDistanceThreshold(w io.Writer, projectID string) error {
	ctx := context.Background()

	client, err := firestore.NewClient(ctx, projectID)
	if err != nil {
		return fmt.Errorf("firestore.NewClient: %w", err)
	}
	defer client.Close()

	collection := client.Collection("coffee-beans")

	// Requires a vector index
	// https://firebase.google.com/docs/firestore/vector-search#create_and_manage_vector_indexes
	vectorQuery := collection.FindNearest("embedding_field",
		[]float32{3.0, 1.0, 2.0},
		10,
		firestore.DistanceMeasureEuclidean,
		&firestore.FindNearestOptions{
			DistanceThreshold: firestore.Ptr[float64](4.5),
		})

	docs, err := vectorQuery.Documents(ctx).GetAll()
	if err != nil {
		fmt.Fprintf(w, "failed to get vector query results: %v", err)
		return err
	}

	for _, doc := range docs {
		fmt.Fprintln(w, doc.Data()["name"])
	}
	return nil
}
vector_search_distance_threshold.go

Java

import com.google.cloud.firestore.VectorQuery;
import com.google.cloud.firestore.VectorQueryOptions;
import com.google.cloud.firestore.VectorQuerySnapshot;

VectorQuery vectorQuery = coll.findNearest(
        "embedding_field",
        new double[] {3.0, 1.0, 2.0},
        /* limit */ 10,
        VectorQuery.DistanceMeasure.EUCLIDEAN,
        VectorQueryOptions.newBuilder()
          .setDistanceThreshold(4.5)
          .build());

ApiFuture<VectorQuerySnapshot> future = vectorQuery.get();
VectorQuerySnapshot vectorQuerySnapshot = future.get();

for (DocumentSnapshot document : vectorQuerySnapshot.getDocuments()) {
    System.out.println(document.getId());
}

ข้อจำกัด

โปรดทราบข้อจำกัดต่อไปนี้ขณะทำงานกับการฝังเวกเตอร์

มิติข้อมูลการฝังสูงสุดที่รองรับคือ 2048 หากต้องการจัดเก็บดัชนีขนาดใหญ่ ให้ใช้การลดมิติ
จำนวนเอกสารสูงสุดที่จะแสดงจากคำค้นหาเพื่อนบ้านที่ใกล้ที่สุดคือ 1,000 รายการ
การค้นหาเวกเตอร์ไม่รองรับเครื่องมือฟังภาพรวมแบบเรียลไทม์
เฉพาะไลบรารีของไคลเอ็นต์ Python, Node.js, Go และ Java เท่านั้นที่รองรับการค้นหาเวกเตอร์

ขั้นตอนถัดไป

อ่านเกี่ยวกับแนวทางปฏิบัติแนะนำสำหรับ Cloud Firestore
ทำความเข้าใจการอ่านและการเขียนที่ปรับขนาดได้