在reader账户中创建一个对象,然后利用Dataflow或Cloud Functions将数据注入到该对象中。
例如,在Google Cloud Console中创建以下表:
CREATE TABLE my_table ( id INT64, name STRING, age INT64 );
然后在Dataflow或Cloud Functions中使用以下代码将数据注入到该表中:
import apache_beam as beam
PROJECT_ID = 'my-project-id' INPUT_SOURCE = 'gs://path/to/input/file' TABLE_ID = 'my_table' DATASET_ID = 'my_dataset'
def write_to_bigquery(row): from google.cloud import bigquery bigquery_client = bigquery.Client(project=PROJECT_ID) dataset = bigquery_client.dataset(DATASET_ID) table = dataset.table(TABLE_ID) table.reload()
errors = table.insert_data([row], ignore_unknown_values=True)
if errors:
print(errors)
options = beam.options.pipeline_options.PipelineOptions() p = beam.Pipeline(options=options)
(p | 'ReadInputFile' >> beam.io.ReadFromText(INPUT_SOURCE) | 'ParseCSV' >> beam.Map(lambda x: x.split(',')) | 'CreateRow' >> beam.Map(lambda x: {'id': int(x[0]), 'name': x[1], 'age': int(x[2])}) | 'WriteToBigQuery' >> beam.Map(write_to_bigquery))
p.run().wait_until_finish()