update-1
spark.sql("SET spark.sql.parser.quotedRegexColumnNames=true")
df.createOrReplaceTempView("table")
spark.sql("select `(account_id|credit_card_limit)?+.+` from table")
.printSchema()
/**
* root
* |-- credit_card_Number: long (nullable = true)
* |-- first_name: string (nullable = true)
* |-- last_name: string (nullable = true)
* |-- phone_number: integer (nullable = true)
* |-- amount: integer (nullable = true)
* |-- date: string (nullable = true)
* |-- shop: string (nullable = true)
* |-- transaction_code: string (nullable = true)
*/
original answer
alternative-
hive syntax
- The following query selects all columns except ds and hr.
- Java regex syntax
SELECT `(ds|hr)?+.+` FROM sales
Spark Syntax
python
df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["Col1", "Col2"])
df.select(df.colRegex("`(Col1)?+.+`")).show()
+----+
|Col2|
+----+
| 1|
| 2|
| 3|
+----+
scala
df.select(df.colRegex("`(account_id|credit_card_limit)?+.+`"))
.printSchema()
/**
* root
* |-- credit_card_Number: long (nullable = true)
* |-- first_name: string (nullable = true)
* |-- last_name: string (nullable = true)
* |-- phone_number: integer (nullable = true)
* |-- amount: integer (nullable = true)
* |-- date: string (nullable = true)
* |-- shop: string (nullable = true)
* |-- transaction_code: string (nullable = true)
*/
Another Approach
df.printSchema()
/**
* root
* |-- account_id: integer (nullable = true)
* |-- credit_card_Number: long (nullable = true)
* |-- credit_card_limit: integer (nullable = true)
* |-- first_name: string (nullable = true)
* |-- last_name: string (nullable = true)
* |-- phone_number: integer (nullable = true)
* |-- amount: integer (nullable = true)
* |-- date: string (nullable = true)
* |-- shop: string (nullable = true)
* |-- transaction_code: string (nullable = true)
*/
// hive syntax
// The following query selects all columns except ds and hr.
// SELECT `(ds|hr)?+.+` FROM sales
// Java regex syntax
df.selectExpr(df.columns.filter(_.matches("(account_id|credit_card_limit)?+.+")): _*)
.printSchema()
/**
* root
* |-- credit_card_Number: long (nullable = true)
* |-- first_name: string (nullable = true)
* |-- last_name: string (nullable = true)
* |-- phone_number: integer (nullable = true)
* |-- amount: integer (nullable = true)
* |-- date: string (nullable = true)
* |-- shop: string (nullable = true)
* |-- transaction_code: string (nullable = true)
*/
ref - hive LanguageManual